aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-07-12 10:01:46 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-07-12 10:01:46 +0000
commite8471d976df1820c39999a32beefc9d4d8b200fb (patch)
tree27fb842ec40ad6eb9374fa046203584dc81d9c84
parentdc417b838162dd338aaa2adb775c46ea0ae0b345 (diff)
parentecb3673040687444c8e6a573b54a3affc4e3a963 (diff)
downloadicing-android13-mainline-go-mediaprovider-release.tar.gz
Snap for 8820681 from ecb3673040687444c8e6a573b54a3affc4e3a963 to mainline-go-mediaprovider-releaseaml_go_mpr_330912000android13-mainline-go-mediaprovider-release
Change-Id: I0c2524e6de8ddf957c18438c0b827242ce1c5147
-rw-r--r--CMakeLists.txt1
-rw-r--r--icing/file/destructible-directory.h74
-rw-r--r--icing/file/destructible-directory_test.cc118
-rw-r--r--icing/file/file-backed-proto-log.h4
-rw-r--r--icing/file/file-backed-vector_test.cc22
-rw-r--r--icing/file/memory-mapped-file.cc3
-rw-r--r--icing/file/portable-file-backed-proto-log_benchmark.cc4
-rw-r--r--icing/icing-search-engine.cc49
-rw-r--r--icing/icing-search-engine.h4
-rw-r--r--icing/icing-search-engine_benchmark.cc26
-rw-r--r--icing/icing-search-engine_flush_benchmark.cc4
-rw-r--r--icing/icing-search-engine_test.cc76
-rw-r--r--icing/index/index-processor.cc7
-rw-r--r--icing/index/index-processor_benchmark.cc4
-rw-r--r--icing/index/index.h10
-rw-r--r--icing/index/index_test.cc62
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.cc22
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc8
-rw-r--r--icing/index/iterator/doc-hit-info-iterator_benchmark.cc4
-rw-r--r--icing/index/lite/lite-index.cc25
-rw-r--r--icing/index/lite/lite-index.h6
-rw-r--r--icing/index/main/flash-index-storage.cc3
-rw-r--r--icing/index/main/flash-index-storage.h2
-rw-r--r--icing/index/main/main-index.cc24
-rw-r--r--icing/index/main/main-index.h9
-rw-r--r--icing/jni/icing-search-engine-jni.cc111
-rw-r--r--icing/jni/scoped-primitive-array-critical.h86
-rw-r--r--icing/jni/scoped-utf-chars.h82
-rw-r--r--icing/legacy/index/icing-dynamic-trie.cc93
-rw-r--r--icing/legacy/index/icing-dynamic-trie.h17
-rw-r--r--icing/legacy/index/icing-dynamic-trie_test.cc223
-rw-r--r--icing/query/query-processor_benchmark.cc4
-rw-r--r--icing/query/query-processor_test.cc15
-rw-r--r--icing/query/suggestion-processor.cc2
-rw-r--r--icing/query/suggestion-processor_test.cc12
-rw-r--r--icing/result/page-result.h46
-rw-r--r--icing/result/projection-tree.h10
-rw-r--r--icing/result/result-retriever-v2.cc175
-rw-r--r--icing/result/result-retriever-v2.h108
-rw-r--r--icing/result/result-retriever-v2_group-result-limiter-test.cc639
-rw-r--r--icing/result/result-retriever-v2_projection-test.cc1281
-rw-r--r--icing/result/result-retriever-v2_snippet-test.cc573
-rw-r--r--icing/result/result-retriever-v2_test.cc641
-rw-r--r--icing/result/result-state-manager.cc42
-rw-r--r--icing/result/result-state-manager.h29
-rw-r--r--icing/result/result-state-manager_test.cc194
-rw-r--r--icing/result/result-state-v2.cc94
-rw-r--r--icing/result/result-state-v2.h125
-rw-r--r--icing/result/result-state-v2_test.cc443
-rw-r--r--icing/result/result-state.cc10
-rw-r--r--icing/schema/schema-store.cc62
-rw-r--r--icing/schema/schema-store.h3
-rw-r--r--icing/schema/schema-store_test.cc40
-rw-r--r--icing/schema/section-manager_test.cc28
-rw-r--r--icing/scoring/bm25f-calculator.cc8
-rw-r--r--icing/scoring/priority-queue-scored-document-hits-ranker.cc55
-rw-r--r--icing/scoring/priority-queue-scored-document-hits-ranker.h72
-rw-r--r--icing/scoring/priority-queue-scored-document-hits-ranker_test.cc239
-rw-r--r--icing/scoring/ranker.cc58
-rw-r--r--icing/scoring/ranker.h12
-rw-r--r--icing/scoring/ranker_benchmark.cc4
-rw-r--r--icing/scoring/score-and-rank_benchmark.cc4
-rw-r--r--icing/scoring/scored-document-hits-ranker.h53
-rw-r--r--icing/store/document-log-creator.cc1
-rw-r--r--icing/store/document-store.cc214
-rw-r--r--icing/store/document-store.h54
-rw-r--r--icing/store/document-store_benchmark.cc7
-rw-r--r--icing/store/document-store_test.cc219
-rw-r--r--icing/store/dynamic-trie-key-mapper.h299
-rw-r--r--icing/store/dynamic-trie-key-mapper_test.cc (renamed from icing/store/key-mapper_test.cc)98
-rw-r--r--icing/store/key-mapper.h244
-rw-r--r--icing/store/namespace-checker-impl.h14
-rw-r--r--icing/testing/common-matchers.h4
-rw-r--r--icing/tokenization/combined-tokenizer_test.cc30
-rw-r--r--icing/tokenization/language-segmenter_benchmark.cc4
-rw-r--r--icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc1
-rw-r--r--icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc1
-rw-r--r--icing/transform/icu/icu-normalizer_benchmark.cc4
-rw-r--r--icing/transform/map/map-normalizer_benchmark.cc4
-rw-r--r--icing/util/document-validator_test.cc4
-rw-r--r--icing/util/fingerprint-util.cc48
-rw-r--r--icing/util/fingerprint-util.h47
-rw-r--r--icing/util/fingerprint-util_test.cc75
-rw-r--r--icing/util/logging.cc124
-rw-r--r--icing/util/logging.h124
-rw-r--r--icing/util/logging_raw.cc102
-rw-r--r--icing/util/logging_raw.h34
-rw-r--r--icing/util/logging_test.cc158
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngine.java77
-rw-r--r--java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java111
-rw-r--r--proto/icing/proto/debug.proto90
-rw-r--r--synced_AOSP_CL_number.txt2
92 files changed, 7533 insertions, 925 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c8e439..48a63d4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ project(icing)
add_definitions("-DICING_REVERSE_JNI_SEGMENTATION=1")
set(VERSION_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/icing/jni.lds")
+set(CMAKE_CXX_STANDARD 17)
set(CMAKE_SHARED_LINKER_FLAGS
"${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--version-script=${VERSION_SCRIPT}")
diff --git a/icing/file/destructible-directory.h b/icing/file/destructible-directory.h
new file mode 100644
index 0000000..9a8bd4b
--- /dev/null
+++ b/icing/file/destructible-directory.h
@@ -0,0 +1,74 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_
+#define ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_
+
+#include "icing/file/filesystem.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+// A convenient RAII class which will recursively create the directory at the
+// specified file path and delete it upon destruction.
+class DestructibleDirectory {
+ public:
+ explicit DestructibleDirectory(const Filesystem* filesystem, std::string dir)
+ : filesystem_(filesystem), dir_(std::move(dir)) {
+ is_valid_ = filesystem_->CreateDirectoryRecursively(dir_.c_str());
+ }
+
+ DestructibleDirectory(const DestructibleDirectory&) = delete;
+ DestructibleDirectory& operator=(const DestructibleDirectory&) = delete;
+
+ DestructibleDirectory(DestructibleDirectory&& rhs)
+ : filesystem_(nullptr), is_valid_(false) {
+ Swap(rhs);
+ }
+
+ DestructibleDirectory& operator=(DestructibleDirectory&& rhs) {
+ Swap(rhs);
+ return *this;
+ }
+
+ ~DestructibleDirectory() {
+ if (filesystem_ != nullptr &&
+ !filesystem_->DeleteDirectoryRecursively(dir_.c_str())) {
+ // Swallow deletion failures as there's nothing actionable to do about
+ // them.
+ ICING_LOG(WARNING) << "Unable to delete temporary directory: " << dir_;
+ }
+ }
+
+ const std::string& dir() const { return dir_; }
+
+ bool is_valid() const { return is_valid_; }
+
+ private:
+ void Swap(DestructibleDirectory& other) {
+ std::swap(filesystem_, other.filesystem_);
+ std::swap(dir_, other.dir_);
+ std::swap(is_valid_, other.is_valid_);
+ }
+
+ const Filesystem* filesystem_;
+ std::string dir_;
+ bool is_valid_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_
diff --git a/icing/file/destructible-directory_test.cc b/icing/file/destructible-directory_test.cc
new file mode 100644
index 0000000..c62db3b
--- /dev/null
+++ b/icing/file/destructible-directory_test.cc
@@ -0,0 +1,118 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/destructible-directory.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(DestructibleFileTest, DeletesDirectoryProperly) {
+ Filesystem filesystem;
+ std::string dir_path = GetTestTempDir() + "/dir1";
+ std::string file_path = dir_path + "/file1";
+
+ {
+ // 1. Create a file in the directory.
+ ASSERT_TRUE(filesystem.CreateDirectoryRecursively(dir_path.c_str()));
+ ScopedFd sfd(filesystem.OpenForWrite(file_path.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ int i = 127;
+ ASSERT_TRUE(filesystem.Write(sfd.get(), &i, sizeof(i)));
+ }
+
+ {
+ // 2. Open the directory with a DestructibleDirectory
+ DestructibleDirectory destructible(&filesystem, dir_path);
+ EXPECT_TRUE(destructible.is_valid());
+ EXPECT_THAT(destructible.dir(), Eq(dir_path));
+ }
+
+ // 3. Ensure that the file and directory don't exist.
+ EXPECT_FALSE(filesystem.FileExists(file_path.c_str()));
+ EXPECT_FALSE(filesystem.DirectoryExists(dir_path.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveAssignDeletesFileProperly) {
+ Filesystem filesystem;
+ std::string filepath1 = GetTestTempDir() + "/dir1";
+ std::string filepath2 = GetTestTempDir() + "/dir2";
+
+ // 1. Create dir1
+ DestructibleDirectory destructible1(&filesystem, filepath1);
+ ASSERT_TRUE(destructible1.is_valid());
+ ASSERT_TRUE(filesystem.DirectoryExists(filepath1.c_str()));
+
+ {
+ // 2. Create dir2
+ DestructibleDirectory destructible2(&filesystem, filepath2);
+ ASSERT_TRUE(destructible2.is_valid());
+
+ // Move assign destructible2 into destructible1
+ destructible1 = std::move(destructible2);
+ }
+
+ // 3. dir1 shouldn't exist because it was destroyed when destructible1 was
+ // move assigned to.
+ EXPECT_FALSE(filesystem.DirectoryExists(filepath1.c_str()));
+
+ // 4. dir2 should still exist because it moved into destructible1 from
+ // destructible2.
+ EXPECT_TRUE(filesystem.DirectoryExists(filepath2.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveConstructionDeletesFileProperly) {
+ Filesystem filesystem;
+ std::string filepath1 = GetTestTempDir() + "/dir1";
+
+ // 1. Create destructible1, it'll be reconstructed soon anyways.
+ std::unique_ptr<DestructibleDirectory> destructible1;
+ {
+ // 2. Create file1
+ DestructibleDirectory destructible2(&filesystem, filepath1);
+ ASSERT_TRUE(destructible2.is_valid());
+
+ // Move construct destructible1 from destructible2
+ destructible1 =
+ std::make_unique<DestructibleDirectory>(std::move(destructible2));
+ }
+
+ // 3. dir1 should still exist because it moved into destructible1 from
+ // destructible2.
+ EXPECT_TRUE(destructible1->is_valid());
+ EXPECT_TRUE(filesystem.DirectoryExists(filepath1.c_str()));
+
+ {
+ // 4. Move construct destructible3 from destructible1
+ DestructibleDirectory destructible3(std::move(*destructible1));
+ EXPECT_TRUE(destructible3.is_valid());
+ }
+
+ // 5. dir1 shouldn't exist because it was destroyed when destructible3 was
+ // destroyed.
+ EXPECT_FALSE(filesystem.DirectoryExists(filepath1.c_str()));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 686b4fb..ad7fae9 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -455,8 +455,8 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
absl_ports::StrCat("Error truncating file: ", file_path));
}
- ICING_LOG(INFO) << "Truncated '" << file_path << "' to size "
- << last_known_good;
+ ICING_LOG(WARNING) << "Truncated '" << file_path << "' to size "
+ << last_known_good;
}
CreateResult create_result = {
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index ed94fa5..2f60c6b 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -23,16 +23,16 @@
#include <string_view>
#include <vector>
-#include "knowledge/cerebra/sense/text_classifier/lib3/utils/base/status.h"
-#include "testing/base/public/gmock.h"
-#include "testing/base/public/gunit.h"
-#include "third_party/icing/file/filesystem.h"
-#include "third_party/icing/file/memory-mapped-file.h"
-#include "third_party/icing/file/mock-filesystem.h"
-#include "third_party/icing/testing/common-matchers.h"
-#include "third_party/icing/testing/tmp-directory.h"
-#include "third_party/icing/util/crc32.h"
-#include "third_party/icing/util/logging.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
using ::testing::Eq;
using ::testing::IsTrue;
@@ -662,7 +662,7 @@ TEST_F(FileBackedVectorTest, RemapFailureStillValidInstance) {
// 2. The next Set call should cause a resize and a remap. Make that remap
// fail.
int num_calls = 0;
- auto open_lambda = [this, &num_calls](const char* file_name){
+ auto open_lambda = [this, &num_calls](const char* file_name) {
if (++num_calls == 2) {
return -1;
}
diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc
index 9ff3adb..fc13a79 100644
--- a/icing/file/memory-mapped-file.cc
+++ b/icing/file/memory-mapped-file.cc
@@ -73,8 +73,6 @@ libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset,
if (mmap_size == 0) {
// First unmap any previously mmapped region.
Unmap();
-
- // Nothing more to do.
return libtextclassifier3::Status::OK;
}
@@ -122,6 +120,7 @@ libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset,
mmap_flags, fd.get(), aligned_offset);
if (mmap_result == MAP_FAILED) {
+ mmap_result = nullptr;
return absl_ports::InternalError(absl_ports::StrCat(
"Failed to mmap region due to error: ", strerror(errno)));
}
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
index 80a8011..d7ea4bb 100644
--- a/icing/file/portable-file-backed-proto-log_benchmark.cc
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -33,7 +33,7 @@
// icing/file:portable-file-backed-proto-log_benchmark
//
// $ blaze-bin/icing/file/portable-file-backed-proto-log_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
//
//
// To build and run on an Android device (must be connected and rooted):
@@ -48,7 +48,7 @@
// /data/local/tmp/
//
// $ adb shell /data/local/tmp/portable-file-backed-proto-log-benchmark
-// --benchmarks=all
+// --benchmark_filter=all
namespace icing {
namespace lib {
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 952ba21..e390f0f 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -529,7 +529,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
}
result_state_manager_ = std::make_unique<ResultStateManager>(
- performance_configuration_.max_num_total_hits, *document_store_);
+ performance_configuration_.max_num_total_hits, *document_store_,
+ clock_.get());
return status;
}
@@ -1374,6 +1375,46 @@ StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
return result;
}
+DebugInfoResultProto IcingSearchEngine::GetDebugInfo(
+ DebugInfoVerbosity::Code verbosity) {
+ DebugInfoResultProto debug_info;
+ StatusProto* result_status = debug_info.mutable_status();
+ absl_ports::shared_lock l(&mutex_);
+ if (!initialized_) {
+ debug_info.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+ debug_info.mutable_status()->set_message(
+ "IcingSearchEngine has not been initialized!");
+ return debug_info;
+ }
+
+ // Index
+ *debug_info.mutable_debug_info()->mutable_index_info() =
+ index_->GetDebugInfo(verbosity);
+
+ // Document Store
+ libtextclassifier3::StatusOr<DocumentDebugInfoProto> document_debug_info =
+ document_store_->GetDebugInfo(verbosity);
+ if (!document_debug_info.ok()) {
+ TransformStatus(document_debug_info.status(), result_status);
+ return debug_info;
+ }
+ *debug_info.mutable_debug_info()->mutable_document_info() =
+ std::move(document_debug_info).ValueOrDie();
+
+ // Schema Store
+ libtextclassifier3::StatusOr<SchemaDebugInfoProto> schema_debug_info =
+ schema_store_->GetDebugInfo();
+ if (!schema_debug_info.ok()) {
+ TransformStatus(schema_debug_info.status(), result_status);
+ return debug_info;
+ }
+ *debug_info.mutable_debug_info()->mutable_schema_info() =
+ std::move(schema_debug_info).ValueOrDie();
+
+ result_status->set_code(StatusProto::OK);
+ return debug_info;
+}
+
libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
PersistType::Code persist_type) {
if (persist_type == PersistType::LITE) {
@@ -1695,7 +1736,8 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore(
}
document_store_ = std::move(create_result_or.ValueOrDie().document_store);
result_state_manager_ = std::make_unique<ResultStateManager>(
- performance_configuration_.max_num_total_hits, *document_store_);
+ performance_configuration_.max_num_total_hits, *document_store_,
+ clock_.get());
// Potential data loss
// TODO(b/147373249): Find a way to detect true data loss error
@@ -1717,7 +1759,8 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore(
}
document_store_ = std::move(create_result_or.ValueOrDie().document_store);
result_state_manager_ = std::make_unique<ResultStateManager>(
- performance_configuration_.max_num_total_hits, *document_store_);
+ performance_configuration_.max_num_total_hits, *document_store_,
+ clock_.get());
// Deletes tmp directory
if (!filesystem_->DeleteDirectoryRecursively(
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index ff9c7fb..6a06fb9 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -403,6 +403,10 @@ class IcingSearchEngine {
// that field will be set to -1.
StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_);
+ // Get debug information for Icing.
+ DebugInfoResultProto GetDebugInfo(DebugInfoVerbosity::Code verbosity)
+ ICING_LOCKS_EXCLUDED(mutex_);
+
// Clears all data from Icing and re-initializes. Clients DO NOT need to call
// Initialize again.
//
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index 5e610d5..6db66f6 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -51,7 +51,7 @@
// //icing:icing-search-engine_benchmark
//
// $ blaze-bin/icing/icing-search-engine_benchmark
-// --benchmarks=all --benchmark_memory_usage
+// --benchmark_filter=all --benchmark_memory_usage
//
// Run on an Android device:
// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
@@ -61,7 +61,8 @@
// $ adb push blaze-bin/icing/icing-search-engine_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/icing-search-engine_benchmark --benchmarks=all
+// $ adb shell /data/local/tmp/icing-search-engine_benchmark
+// --benchmark_filter=all
namespace icing {
namespace lib {
@@ -222,24 +223,19 @@ void BM_IndexLatency(benchmark::State& state) {
std::unique_ptr<IcingSearchEngine> icing =
std::make_unique<IcingSearchEngine>(options);
- ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
-
int num_docs = state.range(0);
std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
const std::vector<DocumentProto> random_docs =
GenerateRandomDocuments(&type_selector, num_docs, language);
- Timer timer;
- for (const DocumentProto& doc : random_docs) {
- ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+ for (auto _ : state) {
+ state.PauseTiming();
+ ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+ state.ResumeTiming();
+ for (const DocumentProto& doc : random_docs) {
+ ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+ }
}
- int64_t time_taken_ns = timer.GetElapsedNanoseconds();
- int64_t time_per_doc_ns = time_taken_ns / num_docs;
- std::cout << "Number of indexed documents:\t" << num_docs
- << "\t\tNumber of indexed sections:\t" << state.range(1)
- << "\t\tTime taken (ms):\t" << time_taken_ns / 1000000
- << "\t\tTime taken per doc (us):\t" << time_per_doc_ns / 1000
- << std::endl;
}
BENCHMARK(BM_IndexLatency)
// Arguments: num_indexed_documents, num_sections
diff --git a/icing/icing-search-engine_flush_benchmark.cc b/icing/icing-search-engine_flush_benchmark.cc
index de8f550..04e83fe 100644
--- a/icing/icing-search-engine_flush_benchmark.cc
+++ b/icing/icing-search-engine_flush_benchmark.cc
@@ -48,7 +48,7 @@
// //icing:icing-search-engine_flush_benchmark
//
// $ blaze-bin/icing/icing-search-engine_flush_benchmark
-// --benchmarks=all --benchmark_memory_usage
+// --benchmark_filter=all --benchmark_memory_usage
//
// Run on an Android device:
// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
@@ -59,7 +59,7 @@
// /data/local/tmp/
//
// $ adb shell /data/local/tmp/icing-search-engine_flush_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
namespace icing {
namespace lib {
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 13e77b8..f922b98 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -3003,7 +3003,6 @@ TEST_F(IcingSearchEngineTest, OptimizationFailureUninitializesIcing) {
HasSubstr("document_dir")))
.WillByDefault(swap_lambda);
TestIcingSearchEngine icing(options, std::move(mock_filesystem),
- std::move(mock_filesystem),
std::make_unique<IcingFilesystem>(),
std::make_unique<FakeClock>(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -8680,6 +8679,81 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) {
ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
+TEST_F(IcingSearchEngineTest, GetDebugInfoVerbosityBasicSucceeds) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Create a document.
+ DocumentProto document = CreateMessageDocument("namespace", "email");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ DebugInfoResultProto result = icing.GetDebugInfo(DebugInfoVerbosity::BASIC);
+ EXPECT_THAT(result.status(), ProtoIsOk());
+
+ // Some sanity checks
+ DebugInfoProto debug_info = result.debug_info();
+ EXPECT_THAT(
+ debug_info.document_info().document_storage_info().num_alive_documents(),
+ Eq(1));
+ EXPECT_THAT(debug_info.document_info().corpus_info(),
+ IsEmpty()); // because verbosity=BASIC
+ EXPECT_THAT(debug_info.schema_info().crc(), Gt(0));
+}
+
+TEST_F(IcingSearchEngineTest,
+ GetDebugInfoVerbosityDetailedSucceedsWithCorpusInfo) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Create 4 documents.
+ DocumentProto document1 = CreateMessageDocument("namespace1", "email/1");
+ DocumentProto document2 = CreateMessageDocument("namespace1", "email/2");
+ DocumentProto document3 = CreateMessageDocument("namespace2", "email/3");
+ DocumentProto document4 = CreateMessageDocument("namespace2", "email/4");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+ DebugInfoResultProto result =
+ icing.GetDebugInfo(DebugInfoVerbosity::DETAILED);
+ EXPECT_THAT(result.status(), ProtoIsOk());
+
+ // Some sanity checks
+ DebugInfoProto debug_info = result.debug_info();
+ EXPECT_THAT(
+ debug_info.document_info().document_storage_info().num_alive_documents(),
+ Eq(4));
+ EXPECT_THAT(debug_info.document_info().corpus_info(), SizeIs(2));
+ EXPECT_THAT(debug_info.schema_info().crc(), Gt(0));
+}
+
+TEST_F(IcingSearchEngineTest, GetDebugInfoUninitialized) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ DebugInfoResultProto result =
+ icing.GetDebugInfo(DebugInfoVerbosity::DETAILED);
+ EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+}
+
+TEST_F(IcingSearchEngineTest, GetDebugInfoNoSchemaNoDocumentsSucceeds) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ DebugInfoResultProto result =
+ icing.GetDebugInfo(DebugInfoVerbosity::DETAILED);
+ ASSERT_THAT(result.status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineTest, GetDebugInfoWithSchemaNoDocumentsSucceeds) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ DebugInfoResultProto result =
+ icing.GetDebugInfo(DebugInfoVerbosity::DETAILED);
+ ASSERT_THAT(result.status(), ProtoIsOk());
+}
+
#ifndef ICING_JNI_TEST
// We skip this test case when we're running in a jni_test since the data files
// will be stored in the android-instrumented storage location, rather than the
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 207c033..edc7881 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -67,6 +67,11 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
uint32_t num_tokens = 0;
libtextclassifier3::Status status;
for (const TokenizedSection& section : tokenized_document.sections()) {
+ if (section.metadata.tokenizer ==
+ StringIndexingConfig::TokenizerType::NONE) {
+ ICING_LOG(WARNING)
+ << "Unexpected TokenizerType::NONE found when indexing document.";
+ }
// TODO(b/152934343): pass real namespace ids in
Index::Editor editor =
index_->Edit(document_id, section.metadata.id,
@@ -82,8 +87,6 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
status = editor.BufferTerm(token.data());
break;
case StringIndexingConfig::TokenizerType::NONE:
- ICING_LOG(WARNING)
- << "Unexpected TokenizerType::NONE found when indexing document.";
[[fallthrough]];
case StringIndexingConfig::TokenizerType::PLAIN:
std::string normalized_term = normalizer_.NormalizeTerm(token);
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 1aad7d0..68c592c 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -39,7 +39,7 @@
// //icing/index:index-processor_benchmark
//
// $ blaze-bin/icing/index/index-processor_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
//
// Run on an Android device:
// Make target //icing/tokenization:language-segmenter depend on
@@ -55,7 +55,7 @@
// $ adb push blaze-bin/icing/index/index-processor_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/index-processor_benchmark --benchmarks=all
+// $ adb shell /data/local/tmp/index-processor_benchmark --benchmark_filter=all
// --adb
// Flag to tell the benchmark that it'll be run on an Android device via adb,
diff --git a/icing/index/index.h b/icing/index/index.h
index 5c53349..f101a91 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -140,11 +140,11 @@ class Index {
}
// Returns debug information for the index in out.
- // verbosity <= 0, simplest debug information - just the lexicons and lite
- // index.
- // verbosity > 0, more detailed debug information including raw postings
- // lists.
- IndexDebugInfoProto GetDebugInfo(int verbosity) const {
+ // verbosity = BASIC, simplest debug information - just the lexicons and lite
+ // index.
+ // verbosity = DETAILED, more detailed debug information including raw
+ // postings lists.
+ IndexDebugInfoProto GetDebugInfo(DebugInfoVerbosity::Code verbosity) const {
IndexDebugInfoProto debug_info;
*debug_info.mutable_index_storage_info() = GetStorageInfo();
*debug_info.mutable_lite_index_info() =
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 8355c01..2eb3b59 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -41,6 +41,7 @@
#include "icing/testing/random-string.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
namespace icing {
namespace lib {
@@ -58,6 +59,8 @@ using ::testing::NiceMock;
using ::testing::Not;
using ::testing::Return;
using ::testing::SizeIs;
+using ::testing::StrEq;
+using ::testing::StrNe;
using ::testing::Test;
using ::testing::UnorderedElementsAre;
@@ -76,10 +79,10 @@ class IndexTest : public Test {
icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
}
- std::unique_ptr<Index> index_;
- std::string index_dir_;
- IcingFilesystem icing_filesystem_;
Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ std::string index_dir_;
+ std::unique_ptr<Index> index_;
};
constexpr DocumentId kDocumentId0 = 0;
@@ -1410,17 +1413,19 @@ TEST_F(IndexTest, GetDebugInfo) {
ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
- IndexDebugInfoProto out0 = index_->GetDebugInfo(/*verbosity=*/0);
- EXPECT_FALSE(out0.main_index_info().has_flash_index_storage_info());
- EXPECT_THAT(out0.main_index_info().last_added_document_id(),
- Eq(kDocumentId1));
- EXPECT_THAT(out0.lite_index_info().curr_size(), Eq(2));
- EXPECT_THAT(out0.lite_index_info().last_added_document_id(),
- Eq(kDocumentId2));
+ IndexDebugInfoProto out0 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
+ ICING_LOG(DBG) << "main_index_info:\n" << out0.main_index_info();
+ ICING_LOG(DBG) << "lite_index_info:\n" << out0.lite_index_info();
+ EXPECT_THAT(out0.main_index_info(), Not(IsEmpty()));
+ EXPECT_THAT(out0.lite_index_info(), Not(IsEmpty()));
- IndexDebugInfoProto out1 = index_->GetDebugInfo(/*verbosity=*/1);
- EXPECT_THAT(out1.main_index_info().flash_index_storage_info(),
- Not(IsEmpty()));
+ IndexDebugInfoProto out1 = index_->GetDebugInfo(DebugInfoVerbosity::DETAILED);
+ ICING_LOG(DBG) << "main_index_info:\n" << out1.main_index_info();
+ ICING_LOG(DBG) << "lite_index_info:\n" << out1.lite_index_info();
+ EXPECT_THAT(out1.main_index_info(),
+ SizeIs(Gt(out0.main_index_info().size())));
+ EXPECT_THAT(out1.lite_index_info(),
+ SizeIs(Gt(out0.lite_index_info().size())));
// Add one more doc to the lite index. Debug strings should change.
edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
@@ -1429,26 +1434,25 @@ TEST_F(IndexTest, GetDebugInfo) {
ASSERT_THAT(edit.BufferTerm("far"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
- IndexDebugInfoProto out2 = index_->GetDebugInfo(/*verbosity=*/0);
- EXPECT_THAT(out2.lite_index_info().curr_size(), Eq(3));
- EXPECT_THAT(out2.lite_index_info().last_added_document_id(),
- Eq(kDocumentId3));
+ IndexDebugInfoProto out2 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
+ ICING_LOG(DBG) << "main_index_info:\n" << out2.main_index_info();
+ ICING_LOG(DBG) << "lite_index_info:\n" << out2.lite_index_info();
+ EXPECT_THAT(out2.main_index_info(), Not(IsEmpty()));
+ EXPECT_THAT(out2.lite_index_info(), Not(IsEmpty()));
+ EXPECT_THAT(out2.main_index_info(), StrEq(out0.main_index_info()));
+ EXPECT_THAT(out2.lite_index_info(), StrNe(out0.lite_index_info()));
- // Merge into the man index. Debuug strings should change again.
+ // Merge into the man index. Debug strings should change again.
ICING_ASSERT_OK(index_->Merge());
- IndexDebugInfoProto out3 = index_->GetDebugInfo(/*verbosity=*/0);
+ IndexDebugInfoProto out3 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
EXPECT_TRUE(out3.has_index_storage_info());
- EXPECT_THAT(out3.main_index_info().lexicon_info(), Not(IsEmpty()));
- EXPECT_THAT(out3.main_index_info().last_added_document_id(),
- Eq(kDocumentId3));
- EXPECT_THAT(out3.lite_index_info().curr_size(), Eq(0));
- EXPECT_THAT(out3.lite_index_info().hit_buffer_size(), Gt(0));
- EXPECT_THAT(out3.lite_index_info().last_added_document_id(),
- Eq(kInvalidDocumentId));
- EXPECT_THAT(out3.lite_index_info().searchable_end(), Eq(0));
- EXPECT_THAT(out3.lite_index_info().index_crc(), Gt(0));
- EXPECT_THAT(out3.lite_index_info().lexicon_info(), Not(IsEmpty()));
+ ICING_LOG(DBG) << "main_index_info:\n" << out3.main_index_info();
+ ICING_LOG(DBG) << "lite_index_info:\n" << out3.lite_index_info();
+ EXPECT_THAT(out3.main_index_info(), Not(IsEmpty()));
+ EXPECT_THAT(out3.lite_index_info(), Not(IsEmpty()));
+ EXPECT_THAT(out3.main_index_info(), StrNe(out2.main_index_info()));
+ EXPECT_THAT(out3.lite_index_info(), StrNe(out2.lite_index_info()));
}
TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index 933f9b5..2e8ba23 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -66,25 +66,19 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
while (delegate_->Advance().ok()) {
- if (!document_store_.DoesDocumentExist(
- delegate_->doc_hit_info().document_id())) {
- // Document doesn't exist, keep searching. This handles deletions and
- // expired documents.
- continue;
- }
-
// Try to get the DocumentFilterData
- auto document_filter_data_or = document_store_.GetDocumentFilterData(
- delegate_->doc_hit_info().document_id());
- if (!document_filter_data_or.ok()) {
+ auto document_filter_data_optional =
+ document_store_.GetAliveDocumentFilterData(
+ delegate_->doc_hit_info().document_id());
+ if (!document_filter_data_optional) {
// Didn't find the DocumentFilterData in the filter cache. This could be
- // because the DocumentId isn't valid or the filter cache is in some
- // invalid state. This is bad, but not the query's responsibility to fix,
- // so just skip this result for now.
+ // because the Document doesn't exist or the DocumentId isn't valid or the
+ // filter cache is in some invalid state. This is bad, but not the query's
+ // responsibility to fix, so just skip this result for now.
continue;
}
// We should be guaranteed that this exists now.
- DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
+ DocumentFilterData data = document_filter_data_optional.value();
if (!options_.namespaces.empty() &&
target_namespace_ids_.count(data.namespace_id()) == 0) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 034c8cb..9d33e2c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -51,15 +51,15 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
SectionIdMask section_id_mask =
delegate_->doc_hit_info().hit_section_ids_mask();
- auto data_or = document_store_.GetDocumentFilterData(document_id);
- if (!data_or.ok()) {
+ auto data_optional =
+ document_store_.GetAliveDocumentFilterData(document_id);
+ if (!data_optional) {
// Ran into some error retrieving information on this hit, skip
continue;
}
// Guaranteed that the DocumentFilterData exists at this point
- DocumentFilterData data = std::move(data_or).ValueOrDie();
- SchemaTypeId schema_type_id = data.schema_type_id();
+ SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
// A hit can be in multiple sections at once, need to check that at least
// one of the confirmed section ids match the name of the target section
diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
index f975989..993c3b8 100644
--- a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
+++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
@@ -35,7 +35,7 @@ namespace {
//
// $
// blaze-bin/icing/index/iterator/doc-hit-info-iterator_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
//
// Run on an Android device:
// $ blaze build --config=android_arm64 -c opt --dynamic_mode=off
@@ -47,7 +47,7 @@ namespace {
// /data/local/tmp/
//
// $ adb shell /data/local/tmp/doc-hit-info-iterator_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
// Functor to be used with std::generate to create a container of DocHitInfos.
// DocHitInfos are generated starting at docid starting_docid and continuing at
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index a5c6baf..fc40225 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -391,15 +391,22 @@ bool LiteIndex::is_full() const {
lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction));
}
-IndexDebugInfoProto::LiteIndexDebugInfoProto LiteIndex::GetDebugInfo(
- int verbosity) {
- IndexDebugInfoProto::LiteIndexDebugInfoProto res;
- res.set_curr_size(header_->cur_size());
- res.set_hit_buffer_size(options_.hit_buffer_size);
- res.set_last_added_document_id(header_->last_added_docid());
- res.set_searchable_end(header_->searchable_end());
- res.set_index_crc(ComputeChecksum().Get());
- lexicon_.GetDebugInfo(verbosity, res.mutable_lexicon_info());
+std::string LiteIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) {
+ std::string res;
+ std::string lexicon_info;
+ lexicon_.GetDebugInfo(verbosity, &lexicon_info);
+ IcingStringUtil::SStringAppendF(
+ &res, 0,
+ "curr_size: %u\n"
+ "hit_buffer_size: %u\n"
+ "last_added_document_id %u\n"
+ "searchable_end: %u\n"
+ "index_crc: %u\n"
+ "\n"
+ "lite_lexicon_info:\n%s\n",
+ header_->cur_size(), options_.hit_buffer_size,
+ header_->last_added_docid(), header_->searchable_end(),
+ ComputeChecksum().Get(), lexicon_info.c_str());
return res;
}
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 378fc94..42d69f8 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -240,9 +240,9 @@ class LiteIndex {
const IcingDynamicTrie& lexicon() const { return lexicon_; }
// Returns debug information for the index in out.
- // verbosity <= 0, simplest debug information - size of lexicon, hit buffer
- // verbosity > 0, more detailed debug information from the lexicon.
- IndexDebugInfoProto::LiteIndexDebugInfoProto GetDebugInfo(int verbosity);
+ // verbosity = BASIC, simplest debug information - size of lexicon, hit buffer
+ // verbosity = DETAILED, more detailed debug information from the lexicon.
+ std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity);
// Returns the byte size of all the elements held in the index. This excludes
// the size of any internal metadata of the index, e.g. the index's header.
diff --git a/icing/index/main/flash-index-storage.cc b/icing/index/main/flash-index-storage.cc
index 3c52375..dabff28 100644
--- a/icing/index/main/flash-index-storage.cc
+++ b/icing/index/main/flash-index-storage.cc
@@ -503,7 +503,8 @@ void FlashIndexStorage::FlushInMemoryFreeList() {
}
}
-void FlashIndexStorage::GetDebugInfo(int verbosity, std::string* out) const {
+void FlashIndexStorage::GetDebugInfo(DebugInfoVerbosity::Code verbosity,
+ std::string* out) const {
// Dump and check integrity of the index block free lists.
out->append("Free lists:\n");
for (size_t i = 0; i < header_block_->header()->num_index_block_infos; ++i) {
diff --git a/icing/index/main/flash-index-storage.h b/icing/index/main/flash-index-storage.h
index 6c6fbb8..fceb26f 100644
--- a/icing/index/main/flash-index-storage.h
+++ b/icing/index/main/flash-index-storage.h
@@ -160,7 +160,7 @@ class FlashIndexStorage {
libtextclassifier3::Status Reset();
// TODO(b/222349894) Convert the string output to a protocol buffer instead.
- void GetDebugInfo(int verbosity, std::string* out) const;
+ void GetDebugInfo(DebugInfoVerbosity::Code verbosity, std::string* out) const;
private:
FlashIndexStorage(const std::string& index_filename,
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 2d6007b..158c287 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -16,6 +16,7 @@
#include <cstdint>
#include <cstring>
#include <memory>
+#include <string>
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
@@ -607,21 +608,28 @@ libtextclassifier3::Status MainIndex::AddPrefixBackfillHits(
return libtextclassifier3::Status::OK;
}
-IndexDebugInfoProto::MainIndexDebugInfoProto MainIndex::GetDebugInfo(
- int verbosity) const {
- IndexDebugInfoProto::MainIndexDebugInfoProto res;
+std::string MainIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) const {
+ std::string res;
// Lexicon.
- main_lexicon_->GetDebugInfo(verbosity, res.mutable_lexicon_info());
+ std::string lexicon_info;
+ main_lexicon_->GetDebugInfo(verbosity, &lexicon_info);
- res.set_last_added_document_id(last_added_document_id());
+ IcingStringUtil::SStringAppendF(&res, 0,
+ "last_added_document_id: %u\n"
+ "\n"
+ "main_lexicon_info:\n%s\n",
+ last_added_document_id(),
+ lexicon_info.c_str());
- if (verbosity <= 0) {
+ if (verbosity == DebugInfoVerbosity::BASIC) {
return res;
}
- flash_index_storage_->GetDebugInfo(verbosity,
- res.mutable_flash_index_storage_info());
+ std::string flash_index_storage_info;
+ flash_index_storage_->GetDebugInfo(verbosity, &flash_index_storage_info);
+ IcingStringUtil::SStringAppendF(&res, 0, "flash_index_storage_info:\n%s\n",
+ flash_index_storage_info.c_str());
return res;
}
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index abb0418..d6f7d5f 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -183,11 +183,10 @@ class MainIndex {
IndexStorageInfoProto storage_info) const;
// Returns debug information for the main index in out.
- // verbosity <= 0, simplest debug information - just the lexicon
- // verbosity > 0, more detailed debug information including raw postings
- // lists.
- IndexDebugInfoProto::MainIndexDebugInfoProto GetDebugInfo(
- int verbosity) const;
+ // verbosity = BASIC, simplest debug information - just the lexicon
+ // verbosity = DETAILED, more detailed debug information including raw
+ // postings lists.
+ std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity) const;
private:
libtextclassifier3::Status Init(const std::string& index_directory,
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index bcc35e6..17bb059 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -15,8 +15,11 @@
#include <jni.h>
#include <string>
+#include <utility>
#include "icing/jni/jni-cache.h"
+#include "icing/jni/scoped-primitive-array-critical.h"
+#include "icing/jni/scoped-utf-chars.h"
#include <google/protobuf/message_lite.h>
#include "icing/absl_ports/status_imports.h"
#include "icing/icing-search-engine.h"
@@ -29,6 +32,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/usage.pb.h"
+#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
namespace {
@@ -39,13 +43,8 @@ const char kNativePointerField[] = "nativePointer";
bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes,
google::protobuf::MessageLite* protobuf) {
- int bytes_size = env->GetArrayLength(bytes);
- uint8_t* bytes_ptr = static_cast<uint8_t*>(
- env->GetPrimitiveArrayCritical(bytes, /*isCopy=*/nullptr));
- bool parsed = protobuf->ParseFromArray(bytes_ptr, bytes_size);
- env->ReleasePrimitiveArrayCritical(bytes, bytes_ptr, /*mode=*/0);
-
- return parsed;
+ icing::lib::ScopedPrimitiveArrayCritical<uint8_t> scoped_array(env, bytes);
+ return protobuf->ParseFromArray(scoped_array.data(), scoped_array.size());
}
jbyteArray SerializeProtoToJniByteArray(
@@ -57,10 +56,8 @@ jbyteArray SerializeProtoToJniByteArray(
return nullptr;
}
- uint8_t* ret_buf = static_cast<uint8_t*>(
- env->GetPrimitiveArrayCritical(ret, /*isCopy=*/nullptr));
- protobuf.SerializeWithCachedSizesToArray(ret_buf);
- env->ReleasePrimitiveArrayCritical(ret, ret_buf, 0);
+ icing::lib::ScopedPrimitiveArrayCritical<uint8_t> scoped_array(env, ret);
+ protobuf.SerializeWithCachedSizesToArray(scoped_array.data());
return ret;
}
@@ -162,11 +159,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType(
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
- const char* native_schema_type =
- env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
+ icing::lib::ScopedUtfChars scoped_schema_type_chars(env, schema_type);
icing::lib::GetSchemaTypeResultProto get_schema_type_result_proto =
- icing->GetSchemaType(native_schema_type);
- env->ReleaseStringUTFChars(schema_type, native_schema_type);
+ icing->GetSchemaType(scoped_schema_type_chars.c_str());
return SerializeProtoToJniByteArray(env, get_schema_type_result_proto);
}
@@ -193,20 +188,19 @@ JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGet(
JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri,
jbyteArray result_spec_bytes) {
+ icing::lib::IcingSearchEngine* icing =
+ GetIcingSearchEnginePointer(env, object);
+
icing::lib::GetResultSpecProto get_result_spec;
if (!ParseProtoFromJniByteArray(env, result_spec_bytes, &get_result_spec)) {
ICING_LOG(ERROR) << "Failed to parse GetResultSpecProto in nativeGet";
return nullptr;
}
- icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(env, object);
- const char* native_name_space =
- env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
- const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr);
+ icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space);
+ icing::lib::ScopedUtfChars scoped_uri_chars(env, uri);
icing::lib::GetResultProto get_result_proto =
- icing->Get(native_name_space, native_uri, get_result_spec);
- env->ReleaseStringUTFChars(uri, native_uri);
- env->ReleaseStringUTFChars(name_space, native_name_space);
+ icing->Get(scoped_name_space_chars.c_str(), scoped_uri_chars.c_str(),
+ get_result_spec);
return SerializeProtoToJniByteArray(env, get_result_proto);
}
@@ -303,13 +297,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDelete(
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
- const char* native_name_space =
- env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
- const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr);
+ icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space);
+ icing::lib::ScopedUtfChars scoped_uri_chars(env, uri);
icing::lib::DeleteResultProto delete_result_proto =
- icing->Delete(native_name_space, native_uri);
- env->ReleaseStringUTFChars(uri, native_uri);
- env->ReleaseStringUTFChars(name_space, native_name_space);
+ icing->Delete(scoped_name_space_chars.c_str(), scoped_uri_chars.c_str());
return SerializeProtoToJniByteArray(env, delete_result_proto);
}
@@ -320,11 +311,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace(
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
- const char* native_name_space =
- env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
+ icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space);
icing::lib::DeleteByNamespaceResultProto delete_by_namespace_result_proto =
- icing->DeleteByNamespace(native_name_space);
- env->ReleaseStringUTFChars(name_space, native_name_space);
+ icing->DeleteByNamespace(scoped_name_space_chars.c_str());
return SerializeProtoToJniByteArray(env, delete_by_namespace_result_proto);
}
@@ -335,18 +324,17 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType(
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
- const char* native_schema_type =
- env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
+ icing::lib::ScopedUtfChars scoped_schema_type_chars(env, schema_type);
icing::lib::DeleteBySchemaTypeResultProto delete_by_schema_type_result_proto =
- icing->DeleteBySchemaType(native_schema_type);
- env->ReleaseStringUTFChars(schema_type, native_schema_type);
+ icing->DeleteBySchemaType(scoped_schema_type_chars.c_str());
return SerializeProtoToJniByteArray(env, delete_by_schema_type_result_proto);
}
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes) {
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
+ jboolean return_deleted_document_info) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -356,7 +344,7 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery(
return nullptr;
}
icing::lib::DeleteByQueryResultProto delete_result_proto =
- icing->DeleteByQuery(search_spec_proto);
+ icing->DeleteByQuery(search_spec_proto, return_deleted_document_info);
return SerializeProtoToJniByteArray(env, delete_result_proto);
}
@@ -445,4 +433,49 @@ Java_com_google_android_icing_IcingSearchEngine_nativeSearchSuggestions(
return SerializeProtoToJniByteArray(env, suggestionResponse);
}
+JNIEXPORT jbyteArray JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeGetDebugInfo(
+ JNIEnv* env, jclass clazz, jobject object, jint verbosity) {
+ icing::lib::IcingSearchEngine* icing =
+ GetIcingSearchEnginePointer(env, object);
+
+ if (!icing::lib::DebugInfoVerbosity::Code_IsValid(verbosity)) {
+ ICING_LOG(ERROR) << "Invalid value for Debug Info verbosity: " << verbosity;
+ return nullptr;
+ }
+
+ icing::lib::DebugInfoResultProto debug_info_result_proto =
+ icing->GetDebugInfo(
+ static_cast<icing::lib::DebugInfoVerbosity::Code>(verbosity));
+
+ return SerializeProtoToJniByteArray(env, debug_info_result_proto);
+}
+
+JNIEXPORT jboolean JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeShouldLog(
+ JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
+ if (!icing::lib::LogSeverity::Code_IsValid(severity)) {
+ ICING_LOG(ERROR) << "Invalid value for logging severity: " << severity;
+ return false;
+ }
+ return icing::lib::ShouldLog(
+ static_cast<icing::lib::LogSeverity::Code>(severity), verbosity);
+}
+
+JNIEXPORT jboolean JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeSetLoggingLevel(
+ JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
+ if (!icing::lib::LogSeverity::Code_IsValid(severity)) {
+ ICING_LOG(ERROR) << "Invalid value for logging severity: " << severity;
+ return false;
+ }
+ return icing::lib::SetLoggingLevel(
+ static_cast<icing::lib::LogSeverity::Code>(severity), verbosity);
+}
+
+JNIEXPORT jstring JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeGetLoggingTag(
+ JNIEnv* env, jclass clazz) {
+ return env->NewStringUTF(icing::lib::kIcingLoggingTag);
+}
} // extern "C"
diff --git a/icing/jni/scoped-primitive-array-critical.h b/icing/jni/scoped-primitive-array-critical.h
new file mode 100644
index 0000000..062c145
--- /dev/null
+++ b/icing/jni/scoped-primitive-array-critical.h
@@ -0,0 +1,86 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_
+#define ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_
+
+#include <jni.h>
+
+#include <utility>
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class ScopedPrimitiveArrayCritical {
+ public:
+ ScopedPrimitiveArrayCritical(JNIEnv* env, jarray array)
+ : env_(env), array_(array) {
+ if (array_ == nullptr) {
+ array_critical_ = nullptr;
+ array_critical_size_ = 0;
+ } else {
+ array_critical_size_ = env->GetArrayLength(array);
+ array_critical_ = static_cast<T*>(
+ env->GetPrimitiveArrayCritical(array, /*isCopy=*/nullptr));
+ }
+ }
+
+ ScopedPrimitiveArrayCritical(ScopedPrimitiveArrayCritical&& rhs)
+ : env_(nullptr),
+ array_(nullptr),
+ array_critical_(nullptr),
+ array_critical_size_(0) {
+ Swap(rhs);
+ }
+
+ ScopedPrimitiveArrayCritical(const ScopedPrimitiveArrayCritical&) = delete;
+
+ ScopedPrimitiveArrayCritical& operator=(ScopedPrimitiveArrayCritical&& rhs) {
+ Swap(rhs);
+ return *this;
+ }
+
+ ScopedPrimitiveArrayCritical& operator=(const ScopedPrimitiveArrayCritical&) =
+ delete;
+
+ ~ScopedPrimitiveArrayCritical() {
+ if (array_critical_ != nullptr && array_ != nullptr) {
+ env_->ReleasePrimitiveArrayCritical(array_, array_critical_, /*mode=*/0);
+ }
+ }
+
+ T* data() { return array_critical_; }
+ const T* data() const { return array_critical_; }
+
+ size_t size() const { return array_critical_size_; }
+
+ private:
+ void Swap(ScopedPrimitiveArrayCritical& other) {
+ std::swap(env_, other.env_);
+ std::swap(array_, other.array_);
+ std::swap(array_critical_, other.array_critical_);
+ std::swap(array_critical_size_, other.array_critical_size_);
+ }
+
+ JNIEnv* env_;
+ jarray array_;
+ T* array_critical_;
+ size_t array_critical_size_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_
diff --git a/icing/jni/scoped-utf-chars.h b/icing/jni/scoped-utf-chars.h
new file mode 100644
index 0000000..2dafcc1
--- /dev/null
+++ b/icing/jni/scoped-utf-chars.h
@@ -0,0 +1,82 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef ICING_JNI_SCOPED_UTF_CHARS_H_
+#define ICING_JNI_SCOPED_UTF_CHARS_H_
+
+#include <jni.h>
+
+#include <cstddef>
+#include <cstring>
+#include <utility>
+
+namespace icing {
+namespace lib {
+
+// An RAII class to manage access and allocation of a Java string's UTF chars.
+class ScopedUtfChars {
+ public:
+ ScopedUtfChars(JNIEnv* env, jstring s) : env_(env), string_(s) {
+ if (s == nullptr) {
+ utf_chars_ = nullptr;
+ size_ = 0;
+ } else {
+ utf_chars_ = env->GetStringUTFChars(s, /*isCopy=*/nullptr);
+ size_ = strlen(utf_chars_);
+ }
+ }
+
+ ScopedUtfChars(ScopedUtfChars&& rhs)
+ : env_(nullptr), string_(nullptr), utf_chars_(nullptr) {
+ Swap(rhs);
+ }
+
+ ScopedUtfChars(const ScopedUtfChars&) = delete;
+
+ ScopedUtfChars& operator=(ScopedUtfChars&& rhs) {
+ Swap(rhs);
+ return *this;
+ }
+
+ ScopedUtfChars& operator=(const ScopedUtfChars&) = delete;
+
+ ~ScopedUtfChars() {
+ if (utf_chars_ != nullptr) {
+ env_->ReleaseStringUTFChars(string_, utf_chars_);
+ }
+ }
+
+ const char* c_str() const { return utf_chars_; }
+
+ size_t size() const { return size_; }
+
+ private:
+ void Swap(ScopedUtfChars& other) {
+ std::swap(env_, other.env_);
+ std::swap(string_, other.string_);
+ std::swap(utf_chars_, other.utf_chars_);
+ std::swap(size_, other.size_);
+ }
+
+ JNIEnv* env_;
+ jstring string_;
+ const char* utf_chars_;
+ size_t size_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JNI_SCOPED_UTF_CHARS_H_
diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc
index 77876c4..4428599 100644
--- a/icing/legacy/index/icing-dynamic-trie.cc
+++ b/icing/legacy/index/icing-dynamic-trie.cc
@@ -101,15 +101,9 @@ namespace {
constexpr uint32_t kInvalidNodeIndex = (1U << 24) - 1;
constexpr uint32_t kInvalidNextIndex = ~0U;
-// Returns the number of valid nexts in the array.
-int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start,
- int next_array_length) {
- int valid_nexts_length = 0;
- for (; valid_nexts_length < next_array_length &&
- next_array_start[valid_nexts_length].node_index() != kInvalidNodeIndex;
- ++valid_nexts_length) {
- }
- return valid_nexts_length;
+void ResetMutableNext(IcingDynamicTrie::Next &mutable_next) {
+ mutable_next.set_val(0xff);
+ mutable_next.set_node_index(kInvalidNodeIndex);
}
} // namespace
@@ -769,8 +763,7 @@ IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) {
// Fill with char 0xff so we are sorted properly.
for (int i = 0; i < aligned_size; i++) {
- ret[i].set_val(0xff);
- ret[i].set_node_index(kInvalidNodeIndex);
+ ResetMutableNext(ret[i]);
}
return ret;
}
@@ -1550,9 +1543,7 @@ bool IcingDynamicTrie::ResetNext(uint32_t next_index) {
if (mutable_next == nullptr) {
return false;
}
-
- mutable_next->set_val(0);
- mutable_next->set_node_index(kInvalidNodeIndex);
+ ResetMutableNext(*mutable_next);
return true;
}
@@ -1570,7 +1561,7 @@ bool IcingDynamicTrie::SortNextArray(const Node *node) {
return false;
}
- std::sort(next_array_start, next_array_start + next_array_buffer_size - 1);
+ std::sort(next_array_start, next_array_start + next_array_buffer_size);
return true;
}
@@ -2116,22 +2107,33 @@ const IcingDynamicTrie::Next *IcingDynamicTrie::GetNextByChar(
return found;
}
+int IcingDynamicTrie::GetValidNextsSize(
+ IcingDynamicTrie::Next *next_array_start, int next_array_length) const {
+ // Only searching for key char 0xff is not sufficient, as 0xff can be a valid
+ // character. We must also specify kInvalidNodeIndex as the target node index
+ // when searching the next array.
+ return LowerBound(next_array_start, next_array_start + next_array_length,
+ /*key_char=*/0xff, /*node_index=*/kInvalidNodeIndex) -
+ next_array_start;
+}
+
const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound(
- const Next *start, const Next *end, uint8_t key_char) const {
+ const Next *start, const Next *end, uint8_t key_char,
+ uint32_t node_index) const {
// Above this value will use binary search instead of linear
// search. 16 was chosen from running some benchmarks with
// different values.
static const uint32_t kBinarySearchCutoff = 16;
+ Next key_next(key_char, node_index);
if (end - start >= kBinarySearchCutoff) {
// Binary search.
- Next key_next(key_char, 0);
return lower_bound(start, end, key_next);
} else {
// Linear search.
const Next *found;
for (found = start; found < end; found++) {
- if (found->val() >= key_char) {
+ if (!(*found < key_next)) {
// Should have gotten match.
break;
}
@@ -2275,6 +2277,40 @@ std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(const char *key,
return prefix_lengths;
}
+bool IcingDynamicTrie::IsBranchingTerm(const char *key) const {
+ if (!is_initialized()) {
+ ICING_LOG(FATAL) << "DynamicTrie not initialized";
+ }
+
+ if (storage_->empty()) {
+ return false;
+ }
+
+ uint32_t best_node_index;
+ int key_offset;
+ FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true);
+ const Node *cur_node = storage_->GetNode(best_node_index);
+
+ if (cur_node->is_leaf()) {
+ return false;
+ }
+
+ // key is not present in the trie.
+ if (key[key_offset] != '\0') {
+ return false;
+ }
+
+ // Found key as an intermediate node, but key is not a valid term stored in
+ // the trie.
+ if (GetNextByChar(cur_node, '\0') == nullptr) {
+ return false;
+ }
+
+ // The intermediate node for key must have more than two children for key to
+ // be a branching term, one of which represents the leaf node for key itself.
+ return cur_node->log2_num_children() > 1;
+}
+
void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
Stats stats;
CollectStats(&stats);
@@ -2500,7 +2536,26 @@ bool IcingDynamicTrie::Delete(const std::string_view key) {
for (uint32_t next_index : nexts_to_reset) {
ResetNext(next_index);
}
- SortNextArray(last_multichild_node);
+
+ if (last_multichild_node != nullptr) {
+ SortNextArray(last_multichild_node);
+ uint32_t next_array_buffer_size =
+ 1u << last_multichild_node->log2_num_children();
+ Next *next_array_start = this->storage_->GetMutableNextArray(
+ last_multichild_node->next_index(), next_array_buffer_size);
+ uint32_t num_children =
+ GetValidNextsSize(next_array_start, next_array_buffer_size);
+ // Shrink the next array if we can.
+ if (num_children == next_array_buffer_size / 2) {
+ Node *mutable_node = storage_->GetMutableNode(
+ storage_->GetNodeIndex(last_multichild_node));
+ mutable_node->set_log2_num_children(mutable_node->log2_num_children() -
+ 1);
+ // Add the unused second half of the next array to the free list.
+ storage_->FreeNextArray(next_array_start + next_array_buffer_size / 2,
+ mutable_node->log2_num_children());
+ }
+ }
return true;
}
diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h
index 013b926..ec8b31a 100644
--- a/icing/legacy/index/icing-dynamic-trie.h
+++ b/icing/legacy/index/icing-dynamic-trie.h
@@ -400,6 +400,16 @@ class IcingDynamicTrie : public IIcingStorage {
// itself. If utf8 is true, does not cut key mid-utf8.
std::vector<int> FindBranchingPrefixLengths(const char *key, bool utf8) const;
+ // Check if key is a branching term.
+ //
+ // key is a branching term, if and only if there exists terms s1 and s2 in the
+ // trie such that key is the maximum common prefix of s1 and s2, but s1 and s2
+ // are not prefixes of each other.
+ //
+ // The function assumes that key is already present in the trie. Otherwise,
+ // false will be returned.
+ bool IsBranchingTerm(const char *key) const;
+
void GetDebugInfo(int verbosity, std::string *out) const override;
double min_free_fraction() const;
@@ -612,8 +622,11 @@ class IcingDynamicTrie : public IIcingStorage {
// Helpers for Find and Insert.
const Next *GetNextByChar(const Node *node, uint8_t key_char) const;
- const Next *LowerBound(const Next *start, const Next *end,
- uint8_t key_char) const;
+ const Next *LowerBound(const Next *start, const Next *end, uint8_t key_char,
+ uint32_t node_index = 0) const;
+ // Returns the number of valid nexts in the array.
+ int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start,
+ int next_array_length) const;
void FindBestNode(const char *key, uint32_t *best_node_index, int *key_offset,
bool prefix, bool utf8 = false) const;
diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc
index 193765b..b69ee64 100644
--- a/icing/legacy/index/icing-dynamic-trie_test.cc
+++ b/icing/legacy/index/icing-dynamic-trie_test.cc
@@ -20,6 +20,7 @@
#include <memory>
#include <string>
#include <unordered_map>
+#include <unordered_set>
#include <vector>
#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
@@ -27,15 +28,18 @@
#include "gtest/gtest.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/testing/random-string.h"
#include "icing/testing/tmp-directory.h"
-
-using testing::ElementsAre;
+#include "icing/util/logging.h"
namespace icing {
namespace lib {
namespace {
+using testing::ContainerEq;
+using testing::ElementsAre;
+
constexpr std::string_view kKeys[] = {
"", "ab", "ac", "abd", "bac", "bb", "bacd", "abbb", "abcdefg",
};
@@ -962,6 +966,102 @@ TEST_F(IcingDynamicTrieTest, DeletingNonExistingKeyShouldReturnTrue) {
EXPECT_TRUE(trie.Find("bed", &value));
}
+TEST_F(IcingDynamicTrieTest, DeletionResortsFullNextArray) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t value = 1;
+ // 'f' -> [ 'a', 'j', 'o', 'u' ]
+ ASSERT_TRUE(trie.Insert("foul", &value));
+ ASSERT_TRUE(trie.Insert("far", &value));
+ ASSERT_TRUE(trie.Insert("fudge", &value));
+ ASSERT_TRUE(trie.Insert("fjord", &value));
+
+ // Delete the third child
+ EXPECT_TRUE(trie.Delete("foul"));
+
+ std::vector<std::string> remaining;
+ for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+ term_iter.IsValid(); term_iter.Advance()) {
+ remaining.push_back(term_iter.GetKey());
+ }
+ EXPECT_THAT(remaining, ElementsAre("far", "fjord", "fudge"));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionResortsPartiallyFilledNextArray) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t value = 1;
+ // 'f' -> [ 'a', 'o', 'u', 0xFF ]
+ ASSERT_TRUE(trie.Insert("foul", &value));
+ ASSERT_TRUE(trie.Insert("far", &value));
+ ASSERT_TRUE(trie.Insert("fudge", &value));
+
+ // Delete the second child
+ EXPECT_TRUE(trie.Delete("foul"));
+
+ std::vector<std::string> remaining;
+ for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+ term_iter.IsValid(); term_iter.Advance()) {
+ remaining.push_back(term_iter.GetKey());
+ }
+ EXPECT_THAT(remaining, ElementsAre("far", "fudge"));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionLoadTest) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ std::default_random_engine random;
+ ICING_LOG(ERROR) << "Seed: " << std::default_random_engine::default_seed;
+ std::vector<std::string> terms;
+ uint32_t value;
+ // Randomly generate 2048 terms.
+ for (int i = 0; i < 2048; ++i) {
+ terms.push_back(RandomString("abcdefg", 5, &random));
+ ASSERT_TRUE(trie.Insert(terms.back().c_str(), &value));
+ }
+
+ // Randomly delete 1024 terms.
+ std::unordered_set<std::string> exp_remaining(terms.begin(), terms.end());
+ std::shuffle(terms.begin(), terms.end(), random);
+ for (int i = 0; i < 1024; ++i) {
+ exp_remaining.erase(terms[i]);
+ ASSERT_TRUE(trie.Delete(terms[i].c_str()));
+ }
+
+ // Check that the iterator still works, and the remaining terms are correct.
+ std::unordered_set<std::string> remaining;
+ for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+ term_iter.IsValid(); term_iter.Advance()) {
+ remaining.insert(term_iter.GetKey());
+ }
+ EXPECT_THAT(remaining, ContainerEq(exp_remaining));
+
+ // Check that we can still insert terms after delete.
+ for (int i = 0; i < 2048; ++i) {
+ std::string term = RandomString("abcdefg", 5, &random);
+ ASSERT_TRUE(trie.Insert(term.c_str(), &value));
+ exp_remaining.insert(term);
+ }
+ remaining.clear();
+ for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+ term_iter.IsValid(); term_iter.Advance()) {
+ remaining.insert(term_iter.GetKey());
+ }
+ EXPECT_THAT(remaining, ContainerEq(exp_remaining));
+}
+
} // namespace
// The tests below are accessing private methods and fields of IcingDynamicTrie
@@ -1133,5 +1233,124 @@ TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) {
ASSERT_EQ(0, trie.property_bitmaps_.size());
}
+TEST_F(IcingDynamicTrieTest, IsBranchingTerm) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t value = 1;
+
+ ASSERT_TRUE(trie.Insert("", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+
+ ASSERT_TRUE(trie.Insert("ab", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+
+ ASSERT_TRUE(trie.Insert("ac", &value));
+ // "" is a prefix of "ab" and "ac", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+
+ ASSERT_TRUE(trie.Insert("ba", &value));
+ // "" now branches to "ba"
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+
+ ASSERT_TRUE(trie.Insert("a", &value));
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ // "a" branches to "ab" and "ac"
+ EXPECT_TRUE(trie.IsBranchingTerm("a"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+
+ ASSERT_TRUE(trie.Insert("abc", &value));
+ ASSERT_TRUE(trie.Insert("acd", &value));
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ EXPECT_TRUE(trie.IsBranchingTerm("a"));
+ // "ab" is a prefix of "abc", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ // "ac" is a prefix of "acd", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+ EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+
+ ASSERT_TRUE(trie.Insert("abcd", &value));
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ EXPECT_TRUE(trie.IsBranchingTerm("a"));
+ // "ab" is a prefix of "abc" and "abcd", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+ // "abc" is a prefix of "abcd", but it is not a branching term.
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+ EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abcd"));
+
+ ASSERT_TRUE(trie.Insert("abd", &value));
+ EXPECT_TRUE(trie.IsBranchingTerm(""));
+ EXPECT_TRUE(trie.IsBranchingTerm("a"));
+ // "ab" branches to "abc" and "abd"
+ EXPECT_TRUE(trie.IsBranchingTerm("ab"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+ EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abcd"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abd"));
+}
+
+TEST_F(IcingDynamicTrieTest, IsBranchingTermShouldWorkForNonExistingTerms) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t value = 1;
+
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+ EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+
+ ASSERT_TRUE(trie.Insert("aa", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm(""));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("ab", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("ac", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("ad", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("a"));
+
+ ASSERT_TRUE(trie.Insert("abcd", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+ ASSERT_TRUE(trie.Insert("abce", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+ ASSERT_TRUE(trie.Insert("abcf", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+ ASSERT_TRUE(trie.Insert("abc_suffix", &value));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc_s"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc_su"));
+ EXPECT_FALSE(trie.IsBranchingTerm("abc_suffi"));
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index e48fe78..b505ac5 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -37,7 +37,7 @@
// //icing/query:query-processor_benchmark
//
// $ blaze-bin/icing/query/query-processor_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
//
// Run on an Android device:
// Make target //icing/tokenization:language-segmenter depend on
@@ -53,7 +53,7 @@
// $ adb push blaze-bin/icing/query/query-processor_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/query-processor_benchmark --benchmarks=all
+// $ adb shell /data/local/tmp/query-processor_benchmark --benchmark_filter=all
// --adb
// Flag to tell the benchmark that it'll be run on an Android device via adb,
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index eaa0efc..a725213 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -127,22 +127,23 @@ class QueryProcessorTest : public Test {
schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
-
Filesystem filesystem_;
const std::string test_dir_;
const std::string store_dir_;
const std::string schema_store_dir_;
+
+ private:
+ IcingFilesystem icing_filesystem_;
+ const std::string index_dir_;
+
+ protected:
std::unique_ptr<Index> index_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
- std::unique_ptr<SchemaStore> schema_store_;
- std::unique_ptr<DocumentStore> document_store_;
FakeClock fake_clock_;
std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
-
- private:
- IcingFilesystem icing_filesystem_;
- const std::string index_dir_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
};
TEST_F(QueryProcessorTest, CreationWithNullPointerShouldFail) {
diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc
index cfa53f6..b1a5a9e 100644
--- a/icing/query/suggestion-processor.cc
+++ b/icing/query/suggestion-processor.cc
@@ -93,4 +93,4 @@ SuggestionProcessor::SuggestionProcessor(
normalizer_(*normalizer) {}
} // namespace lib
-} // namespace icing
+} // namespace icing \ No newline at end of file
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index ba4c90a..b3012e9 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -99,16 +99,18 @@ class SuggestionProcessorTest : public Test {
Filesystem filesystem_;
const std::string test_dir_;
const std::string store_dir_;
+
+ private:
+ IcingFilesystem icing_filesystem_;
+ const std::string index_dir_;
+
+ protected:
std::unique_ptr<Index> index_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
+ FakeClock fake_clock_;
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
- FakeClock fake_clock_;
-
- private:
- IcingFilesystem icing_filesystem_;
- const std::string index_dir_;
};
constexpr DocumentId kDocumentId0 = 0;
diff --git a/icing/result/page-result.h b/icing/result/page-result.h
new file mode 100644
index 0000000..6645593
--- /dev/null
+++ b/icing/result/page-result.h
@@ -0,0 +1,46 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_PAGE_RESULT_H_
+#define ICING_RESULT_PAGE_RESULT_H_
+
+#include <vector>
+
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Contains information of the search result of one page.
+struct PageResult {
+ PageResult(std::vector<SearchResultProto::ResultProto> results_in,
+ int num_results_with_snippets_in, int requested_page_size_in)
+ : results(std::move(results_in)),
+ num_results_with_snippets(num_results_with_snippets_in),
+ requested_page_size(requested_page_size_in) {}
+
+ // Results of one page
+ std::vector<SearchResultProto::ResultProto> results;
+
+ // Number of results with snippets.
+ int num_results_with_snippets;
+
+ // The page size for this query. This should always be >= results.size().
+ int requested_page_size;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_RESULT_PAGE_RESULT_H_
diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h
index b2e5ffc..8e38aaf 100644
--- a/icing/result/projection-tree.h
+++ b/icing/result/projection-tree.h
@@ -18,7 +18,6 @@
#include <string_view>
#include <vector>
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/proto/search.pb.h"
namespace icing {
@@ -31,14 +30,23 @@ class ProjectionTree {
struct Node {
explicit Node(std::string_view name = "") : name(name) {}
+ // TODO: change string_view to string
std::string_view name;
std::vector<Node> children;
+
+ bool operator==(const Node& other) const {
+ return name == other.name && children == other.children;
+ }
};
explicit ProjectionTree(const TypePropertyMask& type_field_mask);
const Node& root() const { return root_; }
+ bool operator==(const ProjectionTree& other) const {
+ return root_ == other.root_;
+ }
+
private:
// Add a child node with property_name to current_children and returns a
// pointer to the child node.
diff --git a/icing/result/result-retriever-v2.cc b/icing/result/result-retriever-v2.cc
new file mode 100644
index 0000000..195f641
--- /dev/null
+++ b/icing/result/result-retriever-v2.cc
@@ -0,0 +1,175 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-retriever-v2.h"
+
+#include <memory>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/projector.h"
+#include "icing/result/snippet-context.h"
+#include "icing/result/snippet-retriever.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+bool GroupResultLimiterV2::ShouldBeRemoved(
+ const ScoredDocumentHit& scored_document_hit,
+ const std::unordered_map<NamespaceId, int>& namespace_group_id_map,
+ const DocumentStore& document_store,
+ std::vector<int>& group_result_limits) const {
+ auto document_filter_data_optional =
+ document_store.GetAliveDocumentFilterData(
+ scored_document_hit.document_id());
+ if (!document_filter_data_optional) {
+ // The document doesn't exist.
+ return true;
+ }
+ NamespaceId namespace_id =
+ document_filter_data_optional.value().namespace_id();
+ auto iter = namespace_group_id_map.find(namespace_id);
+ if (iter == namespace_group_id_map.end()) {
+ // If a namespace id isn't found in namespace_group_id_map, then there are
+ // no limits placed on results from this namespace.
+ return false;
+ }
+ int& count = group_result_limits.at(iter->second);
+ if (count <= 0) {
+ return true;
+ }
+ --count;
+ return false;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<ResultRetrieverV2>>
+ResultRetrieverV2::Create(
+ const DocumentStore* doc_store, const SchemaStore* schema_store,
+ const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
+ std::unique_ptr<const GroupResultLimiterV2> group_result_limiter) {
+ ICING_RETURN_ERROR_IF_NULL(doc_store);
+ ICING_RETURN_ERROR_IF_NULL(schema_store);
+ ICING_RETURN_ERROR_IF_NULL(language_segmenter);
+ ICING_RETURN_ERROR_IF_NULL(normalizer);
+ ICING_RETURN_ERROR_IF_NULL(group_result_limiter);
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<SnippetRetriever> snippet_retriever,
+ SnippetRetriever::Create(schema_store, language_segmenter, normalizer));
+
+ return std::unique_ptr<ResultRetrieverV2>(
+ new ResultRetrieverV2(doc_store, std::move(snippet_retriever),
+ std::move(group_result_limiter)));
+}
+
+std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
+ ResultStateV2& result_state) const {
+ absl_ports::unique_lock l(&result_state.mutex);
+
+ // For calculating page
+ int original_scored_document_hits_ranker_size =
+ result_state.scored_document_hits_ranker->size();
+ int num_results_with_snippets = 0;
+
+ const SnippetContext& snippet_context = result_state.snippet_context();
+ const std::unordered_map<std::string, ProjectionTree>& projection_tree_map =
+ result_state.projection_tree_map();
+ auto wildcard_projection_tree_itr = projection_tree_map.find(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+
+ // Calculates how many snippets to return for this page.
+ int remaining_num_to_snippet =
+ snippet_context.snippet_spec.num_to_snippet() - result_state.num_returned;
+ if (remaining_num_to_snippet < 0) {
+ remaining_num_to_snippet = 0;
+ }
+
+ // Retrieve info
+ std::vector<SearchResultProto::ResultProto> results;
+ while (results.size() < result_state.num_per_page() &&
+ !result_state.scored_document_hits_ranker->empty()) {
+ ScoredDocumentHit next_best_document_hit =
+ result_state.scored_document_hits_ranker->PopNext();
+ if (group_result_limiter_->ShouldBeRemoved(
+ next_best_document_hit, result_state.namespace_group_id_map(),
+ doc_store_, result_state.group_result_limits)) {
+ continue;
+ }
+
+ libtextclassifier3::StatusOr<DocumentProto> document_or =
+ doc_store_.Get(next_best_document_hit.document_id());
+ if (!document_or.ok()) {
+ // Skip the document if getting errors.
+ ICING_LOG(WARNING) << "Fail to fetch document from document store: "
+ << document_or.status().error_message();
+ continue;
+ }
+
+ DocumentProto document = std::move(document_or).ValueOrDie();
+ // Apply projection
+ auto itr = projection_tree_map.find(document.schema());
+ if (itr != projection_tree_map.end()) {
+ projector::Project(itr->second.root().children, &document);
+ } else if (wildcard_projection_tree_itr != projection_tree_map.end()) {
+ projector::Project(wildcard_projection_tree_itr->second.root().children,
+ &document);
+ }
+
+ SearchResultProto::ResultProto result;
+ // Add the snippet if requested.
+ if (snippet_context.snippet_spec.num_matches_per_property() > 0 &&
+ remaining_num_to_snippet > results.size()) {
+ SnippetProto snippet_proto = snippet_retriever_->RetrieveSnippet(
+ snippet_context.query_terms, snippet_context.match_type,
+ snippet_context.snippet_spec, document,
+ next_best_document_hit.hit_section_id_mask());
+ *result.mutable_snippet() = std::move(snippet_proto);
+ ++num_results_with_snippets;
+ }
+
+ // Add the document, itself.
+ *result.mutable_document() = std::move(document);
+ result.set_score(next_best_document_hit.score());
+ results.push_back(std::move(result));
+ }
+
+ // Update numbers in ResultState
+ result_state.num_returned += results.size();
+ result_state.IncrementNumTotalHits(
+ result_state.scored_document_hits_ranker->size() -
+ original_scored_document_hits_ranker_size);
+
+ bool has_more_results = !result_state.scored_document_hits_ranker->empty();
+
+ return std::make_pair(
+ PageResult(std::move(results), num_results_with_snippets,
+ result_state.num_per_page()),
+ has_more_results);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-retriever-v2.h b/icing/result/result-retriever-v2.h
new file mode 100644
index 0000000..b481cfc
--- /dev/null
+++ b/icing/result/result-retriever-v2.h
@@ -0,0 +1,108 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_RETRIEVER_V2_H_
+#define ICING_RESULT_RETRIEVER_V2_H_
+
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/result/snippet-retriever.h"
+#include "icing/schema/schema-store.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+class GroupResultLimiterV2 {
+ public:
+ GroupResultLimiterV2() {}
+
+ virtual ~GroupResultLimiterV2() = default;
+
+ // Returns true if the scored_document_hit should be removed.
+ virtual bool ShouldBeRemoved(
+ const ScoredDocumentHit& scored_document_hit,
+ const std::unordered_map<NamespaceId, int>& namespace_group_id_map,
+ const DocumentStore& document_store,
+ std::vector<int>& group_result_limits) const;
+};
+
+class ResultRetrieverV2 {
+ public:
+ // Factory function to create a ResultRetrieverV2 which does not take
+ // ownership of any input components, and all pointers must refer to valid
+ // objects that outlive the created ResultRetrieverV2 instance.
+ //
+ // Returns:
+ // A ResultRetrieverV2 on success
+ // FAILED_PRECONDITION on any null pointer input
+ static libtextclassifier3::StatusOr<std::unique_ptr<ResultRetrieverV2>>
+ Create(const DocumentStore* doc_store, const SchemaStore* schema_store,
+ const LanguageSegmenter* language_segmenter,
+ const Normalizer* normalizer,
+ std::unique_ptr<const GroupResultLimiterV2> group_result_limiter =
+ std::make_unique<const GroupResultLimiterV2>());
+
+ // Retrieves results (pairs of DocumentProtos and SnippetProtos) with the
+ // given ResultState which holds document and snippet information. It pulls
+ // out the next top rank documents from ResultState, retrieves the documents
+ // from storage, updates ResultState, and finally wraps the result + other
+ // information into PageResult. The expected number of documents to return is
+ // min(num_per_page, the number of all scored document hits) inside
+ // ResultState.
+ //
+ // The number of snippets to return is based on the total number of snippets
+ // needed and number of snippets that have already been returned previously
+ // for the same query. The order of results returned will be sorted by
+ // scored_document_hit_comparator inside ResultState.
+ //
+ // An additional boolean value will be returned, indicating if ResultState has
+ // remaining documents to be retrieved next round.
+ //
+ // All errors will be ignored. It will keep retrieving the next document and
+ // valid documents will be included in PageResult.
+ //
+ // Returns:
+ // std::pair<PageResult, bool>
+ std::pair<PageResult, bool> RetrieveNextPage(
+ ResultStateV2& result_state) const;
+
+ private:
+ explicit ResultRetrieverV2(
+ const DocumentStore* doc_store,
+ std::unique_ptr<SnippetRetriever> snippet_retriever,
+ std::unique_ptr<const GroupResultLimiterV2> group_result_limiter)
+ : doc_store_(*doc_store),
+ snippet_retriever_(std::move(snippet_retriever)),
+ group_result_limiter_(std::move(group_result_limiter)) {}
+
+ const DocumentStore& doc_store_;
+ std::unique_ptr<SnippetRetriever> snippet_retriever_;
+ const std::unique_ptr<const GroupResultLimiterV2> group_result_limiter_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_RESULT_RETRIEVER_V2_H_
diff --git a/icing/result/result-retriever-v2_group-result-limiter-test.cc b/icing/result/result-retriever-v2_group-result-limiter-test.cc
new file mode 100644
index 0000000..e4bfe09
--- /dev/null
+++ b/icing/result/result-retriever-v2_group-result-limiter-test.cc
@@ -0,0 +1,639 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Pair;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
+
+class ResultRetrieverV2GroupResultLimiterTest : public testing::Test {
+ protected:
+ ResultRetrieverV2GroupResultLimiterTest()
+ : test_dir_(GetTestTempDir() + "/icing") {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ }
+
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+ language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter_,
+ language_segmenter_factory::Create(std::move(options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+ /*max_term_byte_size=*/10000));
+
+ SchemaProto schema;
+ schema.add_types()->set_schema_type("Document");
+ ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::unique_ptr<LanguageSegmenter> language_segmenter_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<DocumentStore> document_store_;
+ FakeClock fake_clock_;
+};
+
+// TODO(sungyc): Refactor helper functions below (builder classes or common test
+// utility).
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(match_type);
+ return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+ : ScoringSpecProto::Order::ASC);
+ return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(int num_per_page) {
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(num_per_page);
+ return result_spec;
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+ ResultGroupingShouldLimitResults) {
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1));
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2));
+
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+ ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+ // Create a ResultSpec that limits "namespace" to a single result.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace");
+
+ // Creates a ResultState with 2 ScoredDocumentHits.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Only the top ranked document in "namespace" (document2), should be
+ // returned.
+ auto [page_result, has_more_results] =
+ result_retriever->RetrieveNextPage(result_state);
+ ASSERT_THAT(page_result.results, SizeIs(1));
+ EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
+ // Document1 has not been returned due to GroupResultLimiter, but since it was
+ // "filtered out", there should be no more results.
+ EXPECT_FALSE(has_more_results);
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+ ResultGroupingDoesNotLimitOtherNamespaceResults) {
+ // Creates 4 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1));
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2));
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Document")
+ .SetScore(3)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(document3));
+
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Document")
+ .SetScore(4)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ document_store_->Put(document4));
+
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+ ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+ ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+ ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score())};
+
+ // Create a ResultSpec that limits "namespace1" to a single result, but
+ // doesn't limit "namespace2".
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace1");
+
+ // Creates a ResultState with 4 ScoredDocumentHits.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // All documents in "namespace2" should be returned.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document4));
+ EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document3));
+ EXPECT_THAT(page_result.results.at(2).document(), EqualsProto(document2));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+ ResultGroupingNonexistentNamespaceShouldBeIgnored) {
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1));
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2));
+
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+ ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+ // Create a ResultSpec that limits "namespace"+"nonExistentNamespace" to a
+ // single result.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace");
+ result_grouping->add_namespaces("nonexistentNamespace");
+
+ // Creates a ResultState with 2 ScoredDocumentHits.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Only the top ranked document in "namespace" (document2), should be
+ // returned. The presence of "nonexistentNamespace" in the same result
+ // grouping should have no effect.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(1));
+ EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+ ResultGroupingMultiNamespaceGrouping) {
+ // Creates 6 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4 < document5 <
+ // document6
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1));
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2));
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Document")
+ .SetScore(3)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(document3));
+
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Document")
+ .SetScore(4)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ document_store_->Put(document4));
+
+ DocumentProto document5 = DocumentBuilder()
+ .SetKey("namespace3", "uri/5")
+ .SetSchema("Document")
+ .SetScore(5)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ document_store_->Put(document5));
+
+ DocumentProto document6 = DocumentBuilder()
+ .SetKey("namespace3", "uri/6")
+ .SetSchema("Document")
+ .SetScore(6)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+ document_store_->Put(document6));
+
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+ ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+ ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+ ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score()),
+ ScoredDocumentHit(document_id5, kSectionIdMaskNone, document5.score()),
+ ScoredDocumentHit(document_id6, kSectionIdMaskNone, document6.score())};
+
+ // Create a ResultSpec that limits "namespace1" to a single result and limits
+ // "namespace2"+"namespace3" to a total of two results.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(2);
+ result_grouping->add_namespaces("namespace2");
+ result_grouping->add_namespaces("namespace3");
+
+ // Creates a ResultState with 6 ScoredDocumentHits.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Only the top-ranked result in "namespace1" (document2) should be returned.
+ // Only the top-ranked results across "namespace2" and "namespace3"
+ // (document6, document5) should be returned.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document6));
+ EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document5));
+ EXPECT_THAT(page_result.results.at(2).document(), EqualsProto(document2));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+ ResultGroupingOnlyNonexistentNamespaces) {
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1));
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2));
+
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+ ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+ // Create a ResultSpec that limits "nonexistentNamespace" to a single result.
+ // but doesn't limit "namespace"
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("nonexistentNamespace");
+
+ // Creates a ResultState with 2 ScoredDocumentHits.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // All documents in "namespace" should be returned. The presence of
+ // "nonexistentNamespace" should have no effect.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+ EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
+ EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document1));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+ ShouldUpdateResultStateCorrectlyWithGroupResultLimiter) {
+ // Creates 5 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4 < document5
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace2", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1));
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2));
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri/3")
+ .SetSchema("Document")
+ .SetScore(3)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(document3));
+
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Document")
+ .SetScore(4)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ document_store_->Put(document4));
+
+ DocumentProto document5 = DocumentBuilder()
+ .SetKey("namespace2", "uri/5")
+ .SetSchema("Document")
+ .SetScore(5)
+ .SetCreationTimestampMs(1000)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ document_store_->Put(document5));
+
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+ ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+ ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+ ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score()),
+ ScoredDocumentHit(document_id5, kSectionIdMaskNone, document5.score())};
+
+ // Create a ResultSpec that limits "namespace1" to 3 results and "namespace2"
+ // to a single result.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(3);
+ result_grouping->add_namespaces("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace2");
+
+ // Get namespace ids.
+ ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id1,
+ document_store_->GetNamespaceId("namespace1"));
+ ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id2,
+ document_store_->GetNamespaceId("namespace2"));
+
+ // Creates a ResultState with 5 ScoredDocumentHits.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ *document_store_);
+ {
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ ASSERT_THAT(
+ result_state.namespace_group_id_map(),
+ UnorderedElementsAre(Pair(namespace_id1, 0), Pair(namespace_id2, 1)));
+ ASSERT_THAT(result_state.group_result_limits, ElementsAre(3, 1));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // document5, document4, document1 belong to namespace2 (with max_results =
+ // 1).
+ // docuemnt3, document2 belong to namespace 1 (with max_results = 3).
+ // Since num_per_page is 2, we expect to get document5 and document3 in the
+ // first page.
+ auto [page_result1, has_more_results1] =
+ result_retriever->RetrieveNextPage(result_state);
+ ASSERT_THAT(page_result1.results, SizeIs(2));
+ ASSERT_THAT(page_result1.results.at(0).document(), EqualsProto(document5));
+ ASSERT_THAT(page_result1.results.at(1).document(), EqualsProto(document3));
+ ASSERT_TRUE(has_more_results1);
+ {
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ // Should remove document5, document4 and document3 from
+ // scored_document_hits. It removes more than num_per_page documents because
+ // document4 is filtered out by GroupResultLimiter and ResultRetriever has
+ // to fetch the next one until returning num_per_page documents or no
+ // remaining documents in scored_document_hits.
+ ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
+ document1.score());
+ ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
+ document2.score());
+ EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(2)));
+
+ // Even though we removed 3 document hits from scored_document_hits this
+ // round, num_returned should still be 2, since document4 was "filtered out"
+ // and should not be counted into num_returned.
+ EXPECT_THAT(result_state.num_returned, Eq(2));
+ // namespace_group_id_map should be unchanged.
+ EXPECT_THAT(
+ result_state.namespace_group_id_map(),
+ UnorderedElementsAre(Pair(namespace_id1, 0), Pair(namespace_id2, 1)));
+ // GroupResultLimiter should decrement the # in group_result_limits.
+ EXPECT_THAT(result_state.group_result_limits, ElementsAre(2, 0));
+ }
+
+ // Although there are document2 and document1 left, since namespace2 has
+ // reached its max results, document1 should be excluded from the second page.
+ auto [page_result2, has_more_results2] =
+ result_retriever->RetrieveNextPage(result_state);
+ ASSERT_THAT(page_result2.results, SizeIs(1));
+ ASSERT_THAT(page_result2.results.at(0).document(), EqualsProto(document2));
+ ASSERT_FALSE(has_more_results2);
+ {
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ // Should remove document2 and document1 from scored_document_hits.
+ EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(IsEmpty()));
+ // Even though we removed 2 document hits from scored_document_hits this
+ // round, num_returned should only be incremented by 1 (and thus become 3),
+ // since document1 was "filtered out" and should not be counted into
+ // num_returned.
+ EXPECT_THAT(result_state.num_returned, Eq(3));
+ // namespace_group_id_map should be unchanged.
+ EXPECT_THAT(
+ result_state.namespace_group_id_map(),
+ UnorderedElementsAre(Pair(namespace_id1, 0), Pair(namespace_id2, 1)));
+ // GroupResultLimiter should decrement the # in group_result_limits.
+ EXPECT_THAT(result_state.group_result_limits, ElementsAre(1, 0));
+ }
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-retriever-v2_projection-test.cc b/icing/result/result-retriever-v2_projection-test.cc
new file mode 100644
index 0000000..bdd1715
--- /dev/null
+++ b/icing/result/result-retriever-v2_projection-test.cc
@@ -0,0 +1,1281 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::SizeIs;
+
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto::Cardinality::OPTIONAL;
+
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+ StringIndexingConfig::TokenizerType::PLAIN;
+
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+
+class ResultRetrieverV2ProjectionTest : public testing::Test {
+ protected:
+ ResultRetrieverV2ProjectionTest() : test_dir_(GetTestTempDir() + "/icing") {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ }
+
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+ language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter_,
+ language_segmenter_factory::Create(std::move(options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+ /*max_term_byte_size=*/10000));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ SectionId GetSectionId(const std::string& type, const std::string& property) {
+ auto type_id_or = schema_store_->GetSchemaTypeId(type);
+ if (!type_id_or.ok()) {
+ return kInvalidSectionId;
+ }
+ SchemaTypeId type_id = type_id_or.ValueOrDie();
+ for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
+ auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
+ if (!metadata_or.ok()) {
+ break;
+ }
+ const SectionMetadata* metadata = metadata_or.ValueOrDie();
+ if (metadata->path == property) {
+ return metadata->id;
+ }
+ }
+ return kInvalidSectionId;
+ }
+
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::unique_ptr<LanguageSegmenter> language_segmenter_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<DocumentStore> document_store_;
+ FakeClock fake_clock_;
+};
+
+// TODO(sungyc): Refactor helper functions below (builder classes or common test
+// utility).
+
+SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
+ SectionIdMask mask = 0;
+ for (SectionId section_id : section_ids) {
+ mask |= (1u << section_id);
+ }
+ return mask;
+}
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(match_type);
+ return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+ : ScoringSpecProto::Order::ASC);
+ return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(int num_per_page) {
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(num_per_page);
+ return result_spec;
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTopLevelLeadNodeFieldPath) {
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned results only contain the 'name' property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionNestedLeafNodeFieldPath) {
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("sender.name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned results only contain the 'sender.name'
+ // property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .Build())
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .Build())
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionIntermediateNodeFieldPath) {
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("sender");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned results only contain the 'sender'
+ // property and all of the subproperties of 'sender'.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleNestedFieldPaths) {
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("sender.name");
+ type_property_mask->add_paths("sender.emailAddress");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned results only contain the 'sender.name' and
+ // 'sender.address' properties.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionEmptyFieldPath) {
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned results contain *no* properties.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionInvalidFieldPath) {
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("nonExistentProperty");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned results contain *no* properties.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionValidAndInvalidFieldPath) {
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("name");
+ type_property_mask->add_paths("nonExistentProperty");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned results only contain the 'name' property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesNoWildcards) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+ type_property_mask->set_schema_type("Email");
+ type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Email results only contain the 'name'
+ // property and the returned Person results have all of their properties.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* wildcard_type_property_mask =
+ result_spec.add_type_property_masks();
+ wildcard_type_property_mask->set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Email results only contain the 'name'
+ // property and the returned Person results only contain the 'name' property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+ ProjectionMultipleTypesWildcardWithOneOverride) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Email");
+ email_type_property_mask->add_paths("body");
+ TypePropertyMask* wildcard_type_property_mask =
+ result_spec.add_type_property_masks();
+ wildcard_type_property_mask->set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Email results only contain the 'body'
+ // property and the returned Person results only contain the 'name' property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+ ProjectionSingleTypesWildcardAndOverride) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .AddStringProperty("emailAddress", "mr.body123@gmail.com")
+ .Build())
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Email");
+ email_type_property_mask->add_paths("sender.name");
+ TypePropertyMask* wildcard_type_property_mask =
+ result_spec.add_type_property_masks();
+ wildcard_type_property_mask->set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Email results only contain the 'sender.name'
+ // property and the returned Person results only contain the 'name' property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .Build())
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+ ProjectionSingleTypesWildcardAndOverrideNestedProperty) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .AddStringProperty("emailAddress", "mr.body123@gmail.com")
+ .Build())
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Email");
+ email_type_property_mask->add_paths("sender.name");
+ TypePropertyMask* wildcard_type_property_mask =
+ result_spec.add_type_property_masks();
+ wildcard_type_property_mask->set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask->add_paths("sender");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Email results only contain the 'sender.name'
+ // property and the returned Person results contain no properties.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .Build())
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-retriever-v2_snippet-test.cc b/icing/result/result-retriever-v2_snippet-test.cc
new file mode 100644
index 0000000..afb31cf
--- /dev/null
+++ b/icing/result/result-retriever-v2_snippet-test.cc
@@ -0,0 +1,573 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <limits>
+#include <memory>
+#include <string_view>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/snippet-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto::Cardinality::OPTIONAL;
+
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+ StringIndexingConfig::TokenizerType::PLAIN;
+
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+
+class ResultRetrieverV2SnippetTest : public testing::Test {
+ protected:
+ ResultRetrieverV2SnippetTest() : test_dir_(GetTestTempDir() + "/icing") {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ }
+
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+ language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter_,
+ language_segmenter_factory::Create(std::move(options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+ /*max_term_byte_size=*/10000));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ SectionId GetSectionId(const std::string& type, const std::string& property) {
+ auto type_id_or = schema_store_->GetSchemaTypeId(type);
+ if (!type_id_or.ok()) {
+ return kInvalidSectionId;
+ }
+ SchemaTypeId type_id = type_id_or.ValueOrDie();
+ for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
+ auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
+ if (!metadata_or.ok()) {
+ break;
+ }
+ const SectionMetadata* metadata = metadata_or.ValueOrDie();
+ if (metadata->path == property) {
+ return metadata->id;
+ }
+ }
+ return kInvalidSectionId;
+ }
+
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::unique_ptr<LanguageSegmenter> language_segmenter_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<DocumentStore> document_store_;
+ FakeClock fake_clock_;
+};
+
+// TODO(sungyc): Refactor helper functions below (builder classes or common test
+// utility).
+
+ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
+ ResultSpecProto::SnippetSpecProto snippet_spec;
+ snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max());
+ snippet_spec.set_num_matches_per_property(std::numeric_limits<int>::max());
+ snippet_spec.set_max_window_utf32_length(1024);
+ return snippet_spec;
+}
+
+DocumentProto CreateDocument(int id) {
+ return DocumentBuilder()
+ .SetKey("icing", "Email/" + std::to_string(id))
+ .SetSchema("Email")
+ .AddStringProperty("name", "subject foo " + std::to_string(id))
+ .AddStringProperty("body", "body bar " + std::to_string(id))
+ .SetCreationTimestampMs(1574365086666 + id)
+ .Build();
+}
+
+SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
+ SectionIdMask mask = 0;
+ for (SectionId section_id : section_ids) {
+ mask |= (1u << section_id);
+ }
+ return mask;
+}
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(match_type);
+ return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+ : ScoringSpecProto::Order::ASC);
+ return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(int num_per_page) {
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(num_per_page);
+ return result_spec;
+}
+
+TEST_F(ResultRetrieverV2SnippetTest,
+ DefaultSnippetSpecShouldDisableSnippeting) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(CreateDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(CreateDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/3), *document_store_);
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.results.at(0).snippet(),
+ EqualsProto(SnippetProto::default_instance()));
+ EXPECT_THAT(page_result.results.at(1).snippet(),
+ EqualsProto(SnippetProto::default_instance()));
+ EXPECT_THAT(page_result.results.at(2).snippet(),
+ EqualsProto(SnippetProto::default_instance()));
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(0));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(CreateDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(CreateDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create ResultSpec with custom snippet spec.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+ *result_spec.mutable_snippet_spec() = CreateSnippetSpec();
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/{{"", {"foo", "bar"}}},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(3));
+
+ const DocumentProto& result_document_one =
+ page_result.results.at(0).document();
+ const SnippetProto& result_snippet_one = page_result.results.at(0).snippet();
+ EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1)));
+ EXPECT_THAT(result_snippet_one.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_one, result_snippet_one.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
+ ElementsAre("body bar 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_one,
+ result_snippet_one.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)),
+ ElementsAre("subject foo 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet_one.entries(1)),
+ ElementsAre("foo"));
+
+ const DocumentProto& result_document_two =
+ page_result.results.at(1).document();
+ const SnippetProto& result_snippet_two = page_result.results.at(1).snippet();
+ EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2)));
+ EXPECT_THAT(result_snippet_two.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_two,
+ result_snippet_two.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
+ ElementsAre("body bar 2"));
+ EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_two,
+ result_snippet_two.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)),
+ ElementsAre("subject foo 2"));
+ EXPECT_THAT(GetMatches(content, result_snippet_two.entries(1)),
+ ElementsAre("foo"));
+
+ const DocumentProto& result_document_three =
+ page_result.results.at(2).document();
+ const SnippetProto& result_snippet_three =
+ page_result.results.at(2).snippet();
+ EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3)));
+ EXPECT_THAT(result_snippet_three.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_three,
+ result_snippet_three.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_three.entries(0)),
+ ElementsAre("body bar 3"));
+ EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_three,
+ result_snippet_three.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)),
+ ElementsAre("subject foo 3"));
+ EXPECT_THAT(GetMatches(content, result_snippet_three.entries(1)),
+ ElementsAre("foo"));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(CreateDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(CreateDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+ snippet_spec.set_num_to_snippet(1);
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+ *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/{{"", {"foo", "bar"}}},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(1));
+
+ const DocumentProto& result_document = page_result.results.at(0).document();
+ const SnippetProto& result_snippet = page_result.results.at(0).snippet();
+ EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1)));
+ EXPECT_THAT(result_snippet.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&result_document, result_snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
+ ElementsAre("body bar 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name"));
+ content =
+ GetString(&result_document, result_snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet.entries(1)),
+ ElementsAre("subject foo 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet.entries(1)),
+ ElementsAre("foo"));
+
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(CreateDocument(/*id=*/2)));
+ EXPECT_THAT(page_result.results.at(1).snippet(),
+ EqualsProto(SnippetProto::default_instance()));
+
+ EXPECT_THAT(page_result.results.at(2).document(),
+ EqualsProto(CreateDocument(/*id=*/3)));
+ EXPECT_THAT(page_result.results.at(2).snippet(),
+ EqualsProto(SnippetProto::default_instance()));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(CreateDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(CreateDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+ snippet_spec.set_num_to_snippet(5);
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+ *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/{{"", {"foo", "bar"}}},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ // num_to_snippet = 5, num_previously_returned_in = 0,
+ // We can return 5 - 0 = 5 snippets at most. We're able to return all 3
+ // snippets here.
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
+ EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty()));
+ EXPECT_THAT(page_result.results.at(2).snippet().entries(), Not(IsEmpty()));
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(3));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(CreateDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(CreateDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+ snippet_spec.set_num_to_snippet(5);
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+ *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/{{"", {"foo", "bar"}}},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+ {
+ absl_ports::unique_lock l(&result_state.mutex);
+
+ // Set (previously) num_returned = 3 docs
+ result_state.num_returned = 3;
+ }
+
+ // num_to_snippet = 5, (previously) num_returned = 3,
+ // We can return 5 - 3 = 2 snippets.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
+ EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty()));
+ EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty());
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(2));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(CreateDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store_->Put(CreateDocument(/*id=*/3)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Create ResultSpec with custom snippet spec.
+ ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+ snippet_spec.set_num_to_snippet(5);
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+ *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*query_terms=*/{{"", {"foo", "bar"}}},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ *document_store_);
+ {
+ absl_ports::unique_lock l(&result_state.mutex);
+
+ // Set (previously) num_returned = 6 docs
+ result_state.num_returned = 6;
+ }
+
+ // num_to_snippet = 5, (previously) num_returned = 6,
+ // We can't return any snippets for this page.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(3));
+ EXPECT_THAT(page_result.results.at(0).snippet().entries(), IsEmpty());
+ EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty());
+ EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty());
+ EXPECT_THAT(page_result.num_results_with_snippets, Eq(0));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc
new file mode 100644
index 0000000..f23a88a
--- /dev/null
+++ b/icing/result/result-retriever-v2_test.cc
@@ -0,0 +1,641 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-retriever-v2.h"
+
+#include <atomic>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::DoDefault;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+using ::testing::Return;
+using ::testing::SizeIs;
+using NamespaceIdMap = std::unordered_map<NamespaceId, int>;
+
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto::Cardinality::OPTIONAL;
+
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+ StringIndexingConfig::TokenizerType::PLAIN;
+
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+
+// Mock the behavior of GroupResultLimiter::ShouldBeRemoved.
+class MockGroupResultLimiter : public GroupResultLimiterV2 {
+ public:
+ MockGroupResultLimiter() : GroupResultLimiterV2() {
+ ON_CALL(*this, ShouldBeRemoved).WillByDefault(Return(false));
+ }
+
+ MOCK_METHOD(bool, ShouldBeRemoved,
+ (const ScoredDocumentHit&, const NamespaceIdMap&,
+ const DocumentStore&, std::vector<int>&),
+ (const, override));
+};
+
+class ResultRetrieverV2Test : public ::testing::Test {
+ protected:
+ ResultRetrieverV2Test() : test_dir_(GetTestTempDir() + "/icing") {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ }
+
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+ language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter_,
+ language_segmenter_factory::Create(std::move(options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+ /*max_term_byte_size=*/10000));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+ num_total_hits_ = 0;
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ SectionId GetSectionId(const std::string& type, const std::string& property) {
+ auto type_id_or = schema_store_->GetSchemaTypeId(type);
+ if (!type_id_or.ok()) {
+ return kInvalidSectionId;
+ }
+ SchemaTypeId type_id = type_id_or.ValueOrDie();
+ for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
+ auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
+ if (!metadata_or.ok()) {
+ break;
+ }
+ const SectionMetadata* metadata = metadata_or.ValueOrDie();
+ if (metadata->path == property) {
+ return metadata->id;
+ }
+ }
+ return kInvalidSectionId;
+ }
+
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::unique_ptr<LanguageSegmenter> language_segmenter_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<Normalizer> normalizer_;
+ std::atomic<int> num_total_hits_;
+ FakeClock fake_clock_;
+};
+
+// TODO(sungyc): Refactor helper functions below (builder classes or common test
+// utility).
+
+DocumentProto CreateDocument(int id) {
+ return DocumentBuilder()
+ .SetKey("icing", "Email/" + std::to_string(id))
+ .SetSchema("Email")
+ .AddStringProperty("name", "subject foo " + std::to_string(id))
+ .AddStringProperty("body", "body bar " + std::to_string(id))
+ .SetCreationTimestampMs(1574365086666 + id)
+ .Build();
+}
+
+SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
+ SectionIdMask mask = 0;
+ for (SectionId section_id : section_ids) {
+ mask |= (1u << section_id);
+ }
+ return mask;
+}
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(match_type);
+ return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+ : ScoringSpecProto::Order::ASC);
+ return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(int num_per_page) {
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(num_per_page);
+ return result_spec;
+}
+
+TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
+ EXPECT_THAT(
+ ResultRetrieverV2::Create(/*doc_store=*/nullptr, schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ EXPECT_THAT(
+ ResultRetrieverV2::Create(doc_store.get(), /*schema_store=*/nullptr,
+ language_segmenter_.get(), normalizer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ /*language_segmenter=*/nullptr,
+ normalizer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(),
+ /*normalizer=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(CreateDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ doc_store->Put(CreateDocument(/*id=*/3)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ doc_store->Put(CreateDocument(/*id=*/4)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ doc_store->Put(CreateDocument(/*id=*/5)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/19},
+ {document_id2, hit_section_id_mask, /*score=*/12},
+ {document_id3, hit_section_id_mask, /*score=*/8},
+ {document_id4, hit_section_id_mask, /*score=*/3},
+ {document_id5, hit_section_id_mask, /*score=*/1}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ SearchResultProto::ResultProto result1;
+ *result1.mutable_document() = CreateDocument(/*id=*/1);
+ result1.set_score(19);
+ SearchResultProto::ResultProto result2;
+ *result2.mutable_document() = CreateDocument(/*id=*/2);
+ result2.set_score(12);
+ SearchResultProto::ResultProto result3;
+ *result3.mutable_document() = CreateDocument(/*id=*/3);
+ result3.set_score(8);
+ SearchResultProto::ResultProto result4;
+ *result4.mutable_document() = CreateDocument(/*id=*/4);
+ result4.set_score(3);
+ SearchResultProto::ResultProto result5;
+ *result5.mutable_document() = CreateDocument(/*id=*/5);
+ result5.set_score(1);
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits), /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/2), *doc_store);
+
+ // First page, 2 results
+ auto [page_result1, has_more_results1] =
+ result_retriever->RetrieveNextPage(result_state);
+ EXPECT_THAT(page_result1.results,
+ ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+ // num_results_with_snippets is 0 when there is no snippet.
+ EXPECT_THAT(page_result1.num_results_with_snippets, Eq(0));
+ // Requested page size is same as num_per_page.
+ EXPECT_THAT(page_result1.requested_page_size, Eq(2));
+ // Has more results.
+ EXPECT_TRUE(has_more_results1);
+
+ // Second page, 2 results
+ auto [page_result2, has_more_results2] =
+ result_retriever->RetrieveNextPage(result_state);
+ EXPECT_THAT(page_result2.results,
+ ElementsAre(EqualsProto(result3), EqualsProto(result4)));
+ // num_results_with_snippets is 0 when there is no snippet.
+ EXPECT_THAT(page_result2.num_results_with_snippets, Eq(0));
+ // Requested page size is same as num_per_page.
+ EXPECT_THAT(page_result2.requested_page_size, Eq(2));
+ // Has more results.
+ EXPECT_TRUE(has_more_results2);
+
+ // Third page, 1 result
+ auto [page_result3, has_more_results3] =
+ result_retriever->RetrieveNextPage(result_state);
+ EXPECT_THAT(page_result3.results, ElementsAre(EqualsProto(result5)));
+ // num_results_with_snippets is 0 when there is no snippet.
+ EXPECT_THAT(page_result3.num_results_with_snippets, Eq(0));
+ // Requested page size is same as num_per_page.
+ EXPECT_THAT(page_result3.requested_page_size, Eq(2));
+ // No more results.
+ EXPECT_FALSE(has_more_results3);
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(CreateDocument(/*id=*/2)));
+
+ DocumentId invalid_document_id = -1;
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/12},
+ {document_id2, hit_section_id_mask, /*score=*/4},
+ {invalid_document_id, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get(),
+ std::make_unique<MockGroupResultLimiter>()));
+
+ SearchResultProto::ResultProto result1;
+ *result1.mutable_document() = CreateDocument(/*id=*/1);
+ result1.set_score(12);
+ SearchResultProto::ResultProto result2;
+ *result2.mutable_document() = CreateDocument(/*id=*/2);
+ result2.set_score(4);
+
+ ResultStateV2 result_state1(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/3), *doc_store);
+ PageResult page_result1 =
+ result_retriever->RetrieveNextPage(result_state1).first;
+ EXPECT_THAT(page_result1.results,
+ ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+
+ DocumentId non_existing_document_id = 4;
+ scored_document_hits = {
+ {non_existing_document_id, hit_section_id_mask, /*score=*/15},
+ {document_id1, hit_section_id_mask, /*score=*/12},
+ {document_id2, hit_section_id_mask, /*score=*/4}};
+ ResultStateV2 result_state2(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/3), *doc_store);
+ PageResult page_result2 =
+ result_retriever->RetrieveNextPage(result_state2).first;
+ EXPECT_THAT(page_result2.results,
+ ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
+ MockFilesystem mock_filesystem;
+ EXPECT_CALL(mock_filesystem,
+ PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>()))
+ .WillOnce(Return(false))
+ .WillRepeatedly(DoDefault());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(CreateDocument(/*id=*/2)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get(),
+ std::make_unique<MockGroupResultLimiter>()));
+
+ SearchResultProto::ResultProto result1;
+ *result1.mutable_document() = CreateDocument(/*id=*/1);
+ result1.set_score(0);
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/2), *doc_store);
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ // We mocked mock_filesystem to return an internal error when retrieving doc2,
+ // so doc2 should be skipped and doc1 should still be returned.
+ EXPECT_THAT(page_result.results, ElementsAre(EqualsProto(result1)));
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(CreateDocument(/*id=*/2)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ doc_store->Put(CreateDocument(/*id=*/3)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ doc_store->Put(CreateDocument(/*id=*/4)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ doc_store->Put(CreateDocument(/*id=*/5)));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0},
+ {document_id4, hit_section_id_mask, /*score=*/0},
+ {document_id5, hit_section_id_mask, /*score=*/0}};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/2), *doc_store);
+
+ // First page, 2 results
+ PageResult page_result1 =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result1.results, SizeIs(2));
+ {
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ // num_returned = size of first page
+ EXPECT_THAT(result_state.num_returned, Eq(2));
+ // Should remove the 2 returned docs from scored_document_hits and only
+ // contain the remaining 3.
+ EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(3)));
+ }
+
+ // Second page, 2 results
+ PageResult page_result2 =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result2.results, SizeIs(2));
+ {
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ // num_returned = size of first and second pages
+ EXPECT_THAT(result_state.num_returned, Eq(4));
+ // Should remove the 2 returned docs from scored_document_hits and only
+ // contain the remaining 1.
+ EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(1)));
+ }
+
+ // Third page, 1 result
+ PageResult page_result3 =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result3.results, SizeIs(1));
+ {
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ // num_returned = size of first, second and third pages
+ EXPECT_THAT(result_state.num_returned, Eq(5));
+ // Should remove the 1 returned doc from scored_document_hits and become
+ // empty.
+ EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(IsEmpty()));
+ }
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(CreateDocument(/*id=*/1)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(CreateDocument(/*id=*/2)));
+ std::vector<ScoredDocumentHit> scored_document_hits1 = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+ std::shared_ptr<ResultStateV2> result_state1 =
+ std::make_shared<ResultStateV2>(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits1),
+ /*is_descending=*/true),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/1), *doc_store);
+ {
+ absl_ports::unique_lock l(&result_state1->mutex);
+
+ result_state1->RegisterNumTotalHits(&num_total_hits_);
+ ASSERT_THAT(num_total_hits_, Eq(2));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ doc_store->Put(CreateDocument(/*id=*/3)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ doc_store->Put(CreateDocument(/*id=*/4)));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ doc_store->Put(CreateDocument(/*id=*/5)));
+ std::vector<ScoredDocumentHit> scored_document_hits2 = {
+ {document_id3, hit_section_id_mask, /*score=*/0},
+ {document_id4, hit_section_id_mask, /*score=*/0},
+ {document_id5, hit_section_id_mask, /*score=*/0}};
+ std::shared_ptr<ResultStateV2> result_state2 =
+ std::make_shared<ResultStateV2>(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits2),
+ /*is_descending=*/true),
+ /*query_terms=*/SectionRestrictQueryTermsMap{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/2), *doc_store);
+ {
+ absl_ports::unique_lock l(&result_state2->mutex);
+
+ result_state2->RegisterNumTotalHits(&num_total_hits_);
+ ASSERT_THAT(num_total_hits_, Eq(5));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // Should get 1 doc in the first page of result_state1, and num_total_hits
+ // should be decremented by 1.
+ PageResult page_result1 =
+ result_retriever->RetrieveNextPage(*result_state1).first;
+ ASSERT_THAT(page_result1.results, SizeIs(1));
+ EXPECT_THAT(num_total_hits_, Eq(4));
+
+ // Should get 2 docs in the first page of result_state2, and num_total_hits
+ // should be decremented by 2.
+ PageResult page_result2 =
+ result_retriever->RetrieveNextPage(*result_state2).first;
+ ASSERT_THAT(page_result2.results, SizeIs(2));
+ EXPECT_THAT(num_total_hits_, Eq(2));
+
+ // Should get 1 doc in the second page of result_state2 (although num_per_page
+ // is 2, there is only 1 doc left), and num_total_hits should be decremented
+ // by 1.
+ PageResult page_result3 =
+ result_retriever->RetrieveNextPage(*result_state2).first;
+ ASSERT_THAT(page_result3.results, SizeIs(1));
+ EXPECT_THAT(num_total_hits_, Eq(1));
+
+ // Destruct result_state1. There is 1 doc left, so num_total_hits should be
+ // decremented by 1 when destructing it.
+ result_state1.reset();
+ EXPECT_THAT(num_total_hits_, Eq(0));
+
+ // Destruct result_state2. There is 0 doc left, so num_total_hits should be
+ // unchanged when destructing it.
+ result_state1.reset();
+ EXPECT_THAT(num_total_hits_, Eq(0));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index d606e79..1057f9b 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -23,11 +23,13 @@ namespace icing {
namespace lib {
ResultStateManager::ResultStateManager(int max_total_hits,
- const DocumentStore& document_store)
+ const DocumentStore& document_store,
+ const Clock* clock)
: document_store_(document_store),
max_total_hits_(max_total_hits),
num_total_hits_(0),
- random_generator_(GetSteadyTimeNanoseconds()) {}
+ random_generator_(GetSteadyTimeNanoseconds()),
+ clock_(*clock) {}
libtextclassifier3::StatusOr<PageResultState>
ResultStateManager::RankAndPaginate(ResultState result_state) {
@@ -75,7 +77,8 @@ uint64_t ResultStateManager::Add(ResultState result_state) {
num_total_hits_ += result_state.num_remaining();
result_state_map_.emplace(new_token, std::move(result_state));
// Tracks the insertion order
- token_queue_.push(new_token);
+ token_queue_.push(
+ std::make_pair(new_token, clock_.GetSystemTimeMilliseconds()));
return new_token;
}
@@ -134,10 +137,16 @@ void ResultStateManager::InvalidateAllResultStates() {
InternalInvalidateAllResultStates();
}
+void ResultStateManager::InvalidateExpiredResultStates(
+ int64_t result_state_ttl) {
+ absl_ports::unique_lock l(&mutex_);
+ InternalInvalidateExpiredResultStates(result_state_ttl);
+}
+
void ResultStateManager::InternalInvalidateAllResultStates() {
result_state_map_.clear();
invalidated_token_set_.clear();
- token_queue_ = std::queue<uint64_t>();
+ token_queue_ = std::queue<std::pair<uint64_t, int64_t>>();
num_total_hits_ = 0;
}
@@ -170,16 +179,16 @@ void ResultStateManager::RemoveStatesIfNeeded(const ResultState& result_state) {
// 2. Remove any tokens that were previously invalidated.
while (!token_queue_.empty() &&
- invalidated_token_set_.find(token_queue_.front()) !=
+ invalidated_token_set_.find(token_queue_.front().first) !=
invalidated_token_set_.end()) {
- invalidated_token_set_.erase(token_queue_.front());
+ invalidated_token_set_.erase(token_queue_.front().first);
token_queue_.pop();
}
// 3. If we're over budget, remove states from oldest to newest until we fit
// into our budget.
while (result_state.num_remaining() + num_total_hits_ > max_total_hits_) {
- InternalInvalidateResultState(token_queue_.front());
+ InternalInvalidateResultState(token_queue_.front().first);
token_queue_.pop();
}
invalidated_token_set_.clear();
@@ -198,5 +207,24 @@ void ResultStateManager::InternalInvalidateResultState(uint64_t token) {
}
}
+void ResultStateManager::InternalInvalidateExpiredResultStates(
+ int64_t result_state_ttl) {
+ int64_t current_time = clock_.GetSystemTimeMilliseconds();
+ while (!token_queue_.empty() &&
+ current_time - token_queue_.front().second >= result_state_ttl) {
+ auto itr = result_state_map_.find(token_queue_.front().first);
+ if (itr != result_state_map_.end()) {
+ num_total_hits_ -= itr->second.num_remaining();
+ result_state_map_.erase(itr);
+ } else {
+ // Since result_state_map_ and invalidated_token_set_ are mutually
+ // exclusive, we remove the token from invalidated_token_set_ only if it
+ // isn't present in result_state_map_.
+ invalidated_token_set_.erase(token_queue_.front().first);
+ }
+ token_queue_.pop();
+ }
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/result/result-state-manager.h b/icing/result/result-state-manager.h
index c04217f..745b0ec 100644
--- a/icing/result/result-state-manager.h
+++ b/icing/result/result-state-manager.h
@@ -26,6 +26,7 @@
#include "icing/proto/search.pb.h"
#include "icing/result/page-result-state.h"
#include "icing/result/result-state.h"
+#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -34,11 +35,16 @@ namespace lib {
// SearchResultProto.next_page_token.
inline constexpr uint64_t kInvalidNextPageToken = 0;
+// 1 hr as the default ttl for a ResultState after being pushed into
+// token_queue_.
+inline constexpr int64_t kDefaultResultStateTtlInMs = 1LL * 60 * 60 * 1000;
+
// Used to store and manage ResultState.
class ResultStateManager {
public:
explicit ResultStateManager(int max_total_hits,
- const DocumentStore& document_store);
+ const DocumentStore& document_store,
+ const Clock* clock);
ResultStateManager(const ResultStateManager&) = delete;
ResultStateManager& operator=(const ResultStateManager&) = delete;
@@ -75,6 +81,12 @@ class ResultStateManager {
// Invalidates all result states / tokens currently in ResultStateManager.
void InvalidateAllResultStates() ICING_LOCKS_EXCLUDED(mutex_);
+ // Invalidates expired result states / tokens currently in ResultStateManager
+ // that were created before current_time - result_state_ttl.
+ void InvalidateExpiredResultStates(
+ int64_t result_state_ttl = kDefaultResultStateTtlInMs)
+ ICING_LOCKS_EXCLUDED(mutex_);
+
private:
absl_ports::shared_mutex mutex_;
@@ -94,8 +106,9 @@ class ResultStateManager {
std::unordered_map<uint64_t, ResultState> result_state_map_
ICING_GUARDED_BY(mutex_);
- // A queue used to track the insertion order of tokens
- std::queue<uint64_t> token_queue_ ICING_GUARDED_BY(mutex_);
+ // A queue used to track the insertion order of tokens with pushed timestamps.
+ std::queue<std::pair<uint64_t, int64_t>> token_queue_
+ ICING_GUARDED_BY(mutex_);
// A set to temporarily store the invalidated tokens before they're finally
// removed from token_queue_. We store the invalidated tokens to ensure the
@@ -105,6 +118,8 @@ class ResultStateManager {
// A random 64-bit number generator
std::mt19937_64 random_generator_ ICING_GUARDED_BY(mutex_);
+ const Clock& clock_; // Does not own.
+
// Puts a new result state into the internal storage and returns a next-page
// token associated with it. The token is guaranteed to be unique among all
// currently valid tokens. When the maximum number of result states is
@@ -126,12 +141,18 @@ class ResultStateManager {
void InternalInvalidateResultState(uint64_t token)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Internal method to invalidates all result states / tokens currently in
+ // Internal method to invalidate all result states / tokens currently in
// ResultStateManager. We need this separate method so that other public
// methods don't need to call InvalidateAllResultStates(). Public methods
// calling each other may cause deadlock issues.
void InternalInvalidateAllResultStates()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Internal method to invalidate and remove expired result states / tokens
+ // currently in ResultStateManager that were created before
+ // current_time - result_state_ttl.
+ void InternalInvalidateExpiredResultStates(int64_t result_state_ttl)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
};
} // namespace lib
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 8a9005d..251a736 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -21,6 +21,7 @@
#include "icing/schema/schema-store.h"
#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/clock.h"
@@ -52,11 +53,13 @@ ScoredDocumentHit CreateScoredHit(DocumentId document_id) {
class ResultStateManagerTest : public testing::Test {
protected:
void SetUp() override {
+ clock_ = std::make_unique<FakeClock>();
+
schema_store_base_dir_ = GetTestTempDir() + "/schema_store";
filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store_,
- SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
+ schema_store_, SchemaStore::Create(&filesystem_, schema_store_base_dir_,
+ clock_.get()));
SchemaProto schema;
schema.add_types()->set_schema_type("Document");
ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
@@ -65,7 +68,7 @@ class ResultStateManagerTest : public testing::Test {
filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
- DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
+ DocumentStore::Create(&filesystem_, doc_store_base_dir_, clock_.get(),
schema_store_.get()));
document_store_ = std::move(result.document_store);
}
@@ -73,6 +76,7 @@ class ResultStateManagerTest : public testing::Test {
void TearDown() override {
filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str());
filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str());
+ clock_.reset();
}
ResultState CreateResultState(
@@ -92,13 +96,16 @@ class ResultStateManagerTest : public testing::Test {
return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
}
+ FakeClock* clock() { return clock_.get(); }
+ const FakeClock* clock() const { return clock_.get(); }
+
const DocumentStore& document_store() const { return *document_store_; }
private:
Filesystem filesystem_;
+ std::unique_ptr<FakeClock> clock_;
std::string doc_store_base_dir_;
std::string schema_store_base_dir_;
- Clock clock_;
std::unique_ptr<DocumentStore> document_store_;
std::unique_ptr<SchemaStore> schema_store_;
};
@@ -111,7 +118,8 @@ TEST_F(ResultStateManagerTest, ShouldRankAndPaginateOnePage) {
/*num_per_page=*/10);
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -136,7 +144,8 @@ TEST_F(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) {
/*num_per_page=*/2);
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
@@ -173,7 +182,8 @@ TEST_F(ResultStateManagerTest, EmptyStateShouldReturnError) {
ResultState empty_result_state = CreateResultState({}, /*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
EXPECT_THAT(
result_state_manager.RankAndPaginate(std::move(empty_result_state)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
@@ -192,7 +202,8 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) {
/*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -230,7 +241,8 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) {
/*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -251,6 +263,50 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
+TEST_F(ResultStateManagerTest, ShouldInvalidateOldTokens) {
+ ResultState result_state1 =
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({AddScoredDocument(/*document_id=*/3),
+ AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
+ /*num_per_page=*/1);
+
+ ResultStateManager result_state_manager(
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
+ // Set time as 1s and add state 1.
+ clock()->SetSystemTimeMilliseconds(1000);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ // Set time as 1hr2s and add state 2.
+ clock()->SetSystemTimeMilliseconds(kDefaultResultStateTtlInMs + 2000);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+
+ // Invalidates expired states with default ttl (1 hr). This should only
+ // invalidate state 1.
+ result_state_manager.InvalidateExpiredResultStates();
+
+ // page_result_state1.next_page_token() shouldn't be found
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // page_result_state2.next_page_token() should be found
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state2,
+ result_state_manager.GetNextPage(page_result_state2.next_page_token));
+ EXPECT_THAT(page_result_state2.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/4))));
+}
+
TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
ResultState result_state1 =
CreateResultState({AddScoredDocument(/*document_id=*/0),
@@ -266,7 +322,7 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
/*num_per_page=*/1);
ResultStateManager result_state_manager(/*max_total_hits=*/2,
- document_store());
+ document_store(), clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -317,7 +373,7 @@ TEST_F(ResultStateManagerTest,
// Each result state has a page size of 1 and a result set of 2 hits. So each
// result will take up one hit of our three hit budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store());
+ document_store(), clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -390,7 +446,7 @@ TEST_F(ResultStateManagerTest,
// Each result state has a page size of 1 and a result set of 2 hits. So each
// result will take up one hit of our three hit budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store());
+ document_store(), clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -463,6 +519,99 @@ TEST_F(ResultStateManagerTest,
/*document_id=*/10))));
}
+TEST_F(ResultStateManagerTest,
+ InvalidatedOldResultStatesShouldDecreaseCurrentHitsCount) {
+ ResultState result_state1 =
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/7)},
+ /*num_per_page=*/1);
+ ResultState result_state4 =
+ CreateResultState({AddScoredDocument(/*document_id=*/8),
+ AddScoredDocument(/*document_id=*/9)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // So state 1 ~ state 4 will take up 6 hits in total.
+ ResultStateManager result_state_manager(/*max_total_hits=*/6,
+ document_store(), clock());
+ // Set time as 1000ms and add state 1.
+ clock()->SetSystemTimeMilliseconds(1000);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ // Set time as 1001ms and add state 2.
+ clock()->SetSystemTimeMilliseconds(1001);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ // Set time as 1002ms and add state 3.
+ clock()->SetSystemTimeMilliseconds(1002);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+ // Set time as 1003ms and add state 4.
+ clock()->SetSystemTimeMilliseconds(1003);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+ // Set time as kDefaultResultStateTtlInMs + 1001ms and invalidate expired
+ // states with default ttl (1 hr). This should invalidate state 1 and state 2.
+ clock()->SetSystemTimeMilliseconds(kDefaultResultStateTtlInMs + 1001);
+ result_state_manager.InvalidateExpiredResultStates();
+
+ // page_result_state1.next_page_token() shouldn't be found
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ // page_result_state2.next_page_token() shouldn't be found
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // If invalidating state 1 and state 2 correctly decremented the current hit
+ // count by 4 (to 2), then adding state 5 should still be within our budget
+ // and no other result states should be evicted.
+ ResultState result_state5 =
+ CreateResultState({AddScoredDocument(/*document_id=*/10),
+ AddScoredDocument(/*document_id=*/11),
+ AddScoredDocument(/*document_id=*/12),
+ AddScoredDocument(/*document_id=*/13),
+ AddScoredDocument(/*document_id=*/14)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state5,
+ result_state_manager.RankAndPaginate(std::move(result_state5)));
+
+ // page_result_state3.next_page_token() should be found since there is no
+ // eviction.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/6))));
+ // page_result_state4.next_page_token() should be found since there is no
+ // eviction.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/8))));
+}
+
TEST_F(
ResultStateManagerTest,
InvalidatedResultStateShouldDecreaseCurrentHitsCountByExactStateHitCount) {
@@ -484,7 +633,7 @@ TEST_F(
// Each result state has a page size of 1 and a result set of 2 hits. So each
// result will take up one hit of our three hit budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store());
+ document_store(), clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -571,7 +720,7 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
// Each result state has a page size of 1 and a result set of 2 hits. So each
// result will take up one hit of our three hit budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store());
+ document_store(), clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -648,7 +797,7 @@ TEST_F(ResultStateManagerTest,
// Each result state has a page size of 1 and a result set of 2 hits. So each
// result will take up one hit of our three hit budget.
ResultStateManager result_state_manager(/*max_total_hits=*/3,
- document_store());
+ document_store(), clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -736,7 +885,7 @@ TEST_F(ResultStateManagerTest,
// won't be cached (since it is returned immediately from RankAndPaginate).
// Each result state has a page size of 1. So 3 hits will remain cached.
ResultStateManager result_state_manager(/*max_total_hits=*/4,
- document_store());
+ document_store(), clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -807,7 +956,7 @@ TEST_F(ResultStateManagerTest,
TEST_F(ResultStateManagerTest,
AddingResultStateShouldEvictOverBudgetResultState) {
ResultStateManager result_state_manager(/*max_total_hits=*/4,
- document_store());
+ document_store(), clock());
// Add a result state that is larger than the entire budget. The entire result
// state will still be cached
ResultState result_state1 =
@@ -864,7 +1013,8 @@ TEST_F(ResultStateManagerTest, ShouldGetSnippetContext) {
document_store());
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -899,7 +1049,8 @@ TEST_F(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
document_store());
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -924,7 +1075,8 @@ TEST_F(ResultStateManagerTest, ShouldGetCorrectNumPreviouslyReturned) {
/*num_per_page=*/2);
ResultStateManager result_state_manager(
- /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(),
+ clock());
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
@@ -970,7 +1122,7 @@ TEST_F(ResultStateManagerTest, ShouldStoreAllHits) {
/*num_per_page=*/2);
ResultStateManager result_state_manager(/*max_total_hits=*/4,
- document_store());
+ document_store(), clock());
// The 5 input scored document hits will not be truncated. The first page of
// two hits will be returned immediately and the other three hits will fit
diff --git a/icing/result/result-state-v2.cc b/icing/result/result-state-v2.cc
new file mode 100644
index 0000000..dde50e3
--- /dev/null
+++ b/icing/result/result-state-v2.cc
@@ -0,0 +1,94 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-state-v2.h"
+
+#include <atomic>
+#include <memory>
+
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+SnippetContext CreateSnippetContext(SectionRestrictQueryTermsMap query_terms,
+ const SearchSpecProto& search_spec,
+ const ResultSpecProto& result_spec) {
+ if (result_spec.snippet_spec().num_to_snippet() > 0 &&
+ result_spec.snippet_spec().num_matches_per_property() > 0) {
+ // Needs snippeting
+ return SnippetContext(std::move(query_terms), result_spec.snippet_spec(),
+ search_spec.term_match_type());
+ }
+ return SnippetContext(/*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::UNKNOWN);
+}
+} // namespace
+
+ResultStateV2::ResultStateV2(
+ std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in,
+ SectionRestrictQueryTermsMap query_terms,
+ const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec, const DocumentStore& document_store)
+ : scored_document_hits_ranker(std::move(scored_document_hits_ranker_in)),
+ num_returned(0),
+ snippet_context_(CreateSnippetContext(std::move(query_terms), search_spec,
+ result_spec)),
+ num_per_page_(result_spec.num_per_page()),
+ num_total_hits_(nullptr) {
+ for (const TypePropertyMask& type_field_mask :
+ result_spec.type_property_masks()) {
+ projection_tree_map_.insert(
+ {type_field_mask.schema_type(), ProjectionTree(type_field_mask)});
+ }
+
+ for (const ResultSpecProto::ResultGrouping& result_grouping :
+ result_spec.result_groupings()) {
+ int group_id = group_result_limits.size();
+ group_result_limits.push_back(result_grouping.max_results());
+ for (const std::string& name_space : result_grouping.namespaces()) {
+ auto namespace_id_or = document_store.GetNamespaceId(name_space);
+ if (!namespace_id_or.ok()) {
+ continue;
+ }
+ namespace_group_id_map_.insert({namespace_id_or.ValueOrDie(), group_id});
+ }
+ }
+}
+
+ResultStateV2::~ResultStateV2() {
+ IncrementNumTotalHits(-1 * scored_document_hits_ranker->size());
+}
+
+void ResultStateV2::RegisterNumTotalHits(std::atomic<int>* num_total_hits) {
+ // Decrement the original num_total_hits_ before registering a new one.
+ IncrementNumTotalHits(-1 * scored_document_hits_ranker->size());
+ num_total_hits_ = num_total_hits;
+ IncrementNumTotalHits(scored_document_hits_ranker->size());
+}
+
+void ResultStateV2::IncrementNumTotalHits(int increment_by) {
+ if (num_total_hits_ != nullptr) {
+ *num_total_hits_ += increment_by;
+ }
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-state-v2.h b/icing/result/result-state-v2.h
new file mode 100644
index 0000000..fc56936
--- /dev/null
+++ b/icing/result/result-state-v2.h
@@ -0,0 +1,125 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_RESULT_STATE_V2_H_
+#define ICING_RESULT_RESULT_STATE_V2_H_
+
+#include <atomic>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/absl_ports/mutex.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
+
+namespace icing {
+namespace lib {
+
+// Used to hold information needed across multiple pagination requests of the
+// same query. Stored in ResultStateManager.
+class ResultStateV2 {
+ public:
+ explicit ResultStateV2(
+ std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in,
+ SectionRestrictQueryTermsMap query_terms,
+ const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec, const DocumentStore& document_store);
+
+ ~ResultStateV2();
+
+ // Register num_total_hits_ and add current scored_document_hits_ranker.size()
+ // to it. When re-registering, it will subtract
+ // scored_document_hits_ranker.size() from the original counter.
+ void RegisterNumTotalHits(std::atomic<int>* num_total_hits)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex);
+
+ // Increment the global counter num_total_hits_ by increment_by, if
+ // num_total_hits_ has been registered (is not nullptr).
+ // Note that providing a negative value for increment_by is a valid usage,
+ // which will actually decrement num_total_hits_.
+ //
+ // It has to be called when we change scored_document_hits_ranker.
+ void IncrementNumTotalHits(int increment_by)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex);
+
+ const SnippetContext& snippet_context() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ return snippet_context_;
+ }
+
+ const std::unordered_map<std::string, ProjectionTree>& projection_tree_map()
+ const ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ return projection_tree_map_;
+ }
+
+ const std::unordered_map<NamespaceId, int>& namespace_group_id_map() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ return namespace_group_id_map_;
+ }
+
+ int num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ return num_per_page_;
+ }
+
+ absl_ports::shared_mutex mutex;
+
+ // When evaluating the next top K hits from scored_document_hits_ranker, some
+ // of them may be filtered out by group_result_limits and won't return to the
+ // client, so they shouldn't be counted into num_returned. Also the logic of
+ // group result limiting depends on retrieval, so it is impossible for
+ // ResultState itself to correctly modify these fields. Thus, we make them
+ // public, so users of this class can modify them directly.
+
+ // The scored document hits ranker.
+ std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker
+ ICING_GUARDED_BY(mutex);
+
+ // The count of remaining results to return for a group where group id is the
+ // index.
+ std::vector<int> group_result_limits ICING_GUARDED_BY(mutex);
+
+ // Number of results that have already been returned.
+ int num_returned ICING_GUARDED_BY(mutex);
+
+ private:
+ // Information needed for snippeting.
+ SnippetContext snippet_context_ ICING_GUARDED_BY(mutex);
+
+ // Information needed for projection.
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map_
+ ICING_GUARDED_BY(mutex);
+
+ // A map between namespace id and the id of the group that it appears in.
+ std::unordered_map<NamespaceId, int> namespace_group_id_map_
+ ICING_GUARDED_BY(mutex);
+
+ // Number of results to return in each page.
+ int num_per_page_ ICING_GUARDED_BY(mutex);
+
+ // Pointer to a global counter to sum up the size of
+ // scored_document_hits_ranker in all ResultStates.
+ // Does not own.
+ std::atomic<int>* num_total_hits_ ICING_GUARDED_BY(mutex);
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_RESULT_RESULT_STATE_V2_H_
diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc
new file mode 100644
index 0000000..8e6b29a
--- /dev/null
+++ b/icing/result/result-state-v2_test.cc
@@ -0,0 +1,443 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-state-v2.h"
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/file/filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/schema/schema-store.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(match_type);
+ return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+ : ScoringSpecProto::Order::ASC);
+ return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(int num_per_page) {
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(num_per_page);
+ return result_spec;
+}
+
+class ResultStateV2Test : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ schema_store_base_dir_ = GetTestTempDir() + "/schema_store";
+ filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
+ SchemaProto schema;
+ schema.add_types()->set_schema_type("Document");
+ ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+
+ doc_store_base_dir_ = GetTestTempDir() + "/document_store";
+ filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult result,
+ DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
+ schema_store_.get()));
+ document_store_ = std::move(result.document_store);
+
+ num_total_hits_ = 0;
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str());
+ filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str());
+ }
+
+ ScoredDocumentHit AddScoredDocument(DocumentId document_id) {
+ DocumentProto document;
+ document.set_namespace_("namespace");
+ document.set_uri(std::to_string(document_id));
+ document.set_schema("Document");
+ document_store_->Put(std::move(document));
+ return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
+ }
+
+ DocumentStore& document_store() { return *document_store_; }
+
+ std::atomic<int>& num_total_hits() { return num_total_hits_; }
+
+ const std::atomic<int>& num_total_hits() const { return num_total_hits_; }
+
+ private:
+ Filesystem filesystem_;
+ std::string doc_store_base_dir_;
+ std::string schema_store_base_dir_;
+ Clock clock_;
+ std::unique_ptr<DocumentStore> document_store_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::atomic<int> num_total_hits_;
+};
+
+TEST_F(ResultStateV2Test, ShouldReturnSnippetContextAccordingToSpecs) {
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+
+ SectionRestrictQueryTermsMap query_terms_map;
+ query_terms_map.emplace("term1", std::unordered_set<std::string>());
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::vector<ScoredDocumentHit>(),
+ /*is_descending=*/true),
+ query_terms_map, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ document_store());
+
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ const SnippetContext snippet_context = result_state.snippet_context();
+
+ // Snippet context should be derived from the specs above.
+ EXPECT_TRUE(snippet_context.query_terms.find("term1") !=
+ snippet_context.query_terms.end());
+ EXPECT_THAT(snippet_context.snippet_spec,
+ EqualsProto(result_spec.snippet_spec()));
+ EXPECT_THAT(snippet_context.match_type, Eq(TermMatchType::EXACT_ONLY));
+
+ // The same copy can be fetched multiple times.
+ const SnippetContext snippet_context2 = result_state.snippet_context();
+ EXPECT_TRUE(snippet_context2.query_terms.find("term1") !=
+ snippet_context2.query_terms.end());
+ EXPECT_THAT(snippet_context2.snippet_spec,
+ EqualsProto(result_spec.snippet_spec()));
+ EXPECT_THAT(snippet_context2.match_type, Eq(TermMatchType::EXACT_ONLY));
+}
+
+TEST_F(ResultStateV2Test, NoSnippetingShouldReturnNull) {
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ // Setting num_to_snippet to 0 so that snippeting info won't be
+ // stored.
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+
+ SectionRestrictQueryTermsMap query_terms_map;
+ query_terms_map.emplace("term1", std::unordered_set<std::string>());
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::vector<ScoredDocumentHit>(),
+ /*is_descending=*/true),
+ query_terms_map, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ document_store());
+
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ const SnippetContext snippet_context = result_state.snippet_context();
+ EXPECT_THAT(snippet_context.query_terms, IsEmpty());
+ EXPECT_THAT(
+ snippet_context.snippet_spec,
+ EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance()));
+ EXPECT_THAT(snippet_context.match_type, TermMatchType::UNKNOWN);
+}
+
+TEST_F(ResultStateV2Test, ShouldConstructProjectionTreeMapAccordingToSpecs) {
+ // Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Email");
+ email_type_property_mask->add_paths("sender.name");
+ email_type_property_mask->add_paths("sender.emailAddress");
+ TypePropertyMask* phone_type_property_mask =
+ result_spec.add_type_property_masks();
+ phone_type_property_mask->set_schema_type("Phone");
+ phone_type_property_mask->add_paths("caller");
+ TypePropertyMask* wildcard_type_property_mask =
+ result_spec.add_type_property_masks();
+ wildcard_type_property_mask->set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask->add_paths("wild.card");
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::vector<ScoredDocumentHit>(),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ document_store());
+
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ const std::unordered_map<std::string, ProjectionTree>& projection_tree_map =
+ result_state.projection_tree_map();
+ EXPECT_THAT(projection_tree_map,
+ UnorderedElementsAre(
+ Pair("Email", ProjectionTree(*email_type_property_mask)),
+ Pair("Phone", ProjectionTree(*phone_type_property_mask)),
+ Pair(std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(*wildcard_type_property_mask))));
+}
+
+TEST_F(ResultStateV2Test,
+ ShouldConstructNamespaceGroupIdMapAndGroupResultLimitsAccordingToSpecs) {
+ // Create 3 docs under namespace1, namespace2, namespace3.
+ DocumentProto document1;
+ document1.set_namespace_("namespace1");
+ document1.set_uri("uri/1");
+ document1.set_schema("Document");
+ ICING_ASSERT_OK(document_store().Put(std::move(document1)));
+
+ DocumentProto document2;
+ document2.set_namespace_("namespace2");
+ document2.set_uri("uri/2");
+ document2.set_schema("Document");
+ ICING_ASSERT_OK(document_store().Put(std::move(document2)));
+
+ DocumentProto document3;
+ document3.set_namespace_("namespace3");
+ document3.set_uri("uri/3");
+ document3.set_schema("Document");
+ ICING_ASSERT_OK(document_store().Put(std::move(document3)));
+
+ // Create a ResultSpec that limits "namespace1" to 3 results and limits
+ // "namespace2"+"namespace3" to a total of 2 results. Also add
+ // "nonexistentNamespace1" and "nonexistentNamespace2" to test the behavior.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(3);
+ result_grouping->add_namespaces("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(5);
+ result_grouping->add_namespaces("nonexistentNamespace2");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(2);
+ result_grouping->add_namespaces("namespace2");
+ result_grouping->add_namespaces("namespace3");
+ result_grouping->add_namespaces("nonexistentNamespace1");
+
+ // Get namespace ids.
+ ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id1,
+ document_store().GetNamespaceId("namespace1"));
+ ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id2,
+ document_store().GetNamespaceId("namespace2"));
+ ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id3,
+ document_store().GetNamespaceId("namespace3"));
+
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::vector<ScoredDocumentHit>(),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ document_store());
+
+ absl_ports::shared_lock l(&result_state.mutex);
+
+ // "namespace1" should be in group 0, and "namespace2"+"namespace3" should be
+ // in group 2.
+ // "nonexistentNamespace1" and "nonexistentNamespace2" shouldn't exist.
+ EXPECT_THAT(
+ result_state.namespace_group_id_map(),
+ UnorderedElementsAre(Pair(namespace_id1, 0), Pair(namespace_id2, 2),
+ Pair(namespace_id3, 2)));
+
+ // group_result_limits should contain 3 (at index 0 for group 0), 5 (at index
+ // 1 for group 1), 2 (at index 2 for group 2), even though there is no valid
+ // namespace in group 1.
+ EXPECT_THAT(result_state.group_result_limits, ElementsAre(3, 5, 2));
+}
+
+TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHits) {
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/3)};
+
+ // Creates a ResultState with 5 ScoredDocumentHits.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/5), document_store());
+
+ absl_ports::unique_lock l(&result_state.mutex);
+
+ EXPECT_THAT(num_total_hits(), Eq(0));
+ result_state.RegisterNumTotalHits(&num_total_hits());
+ EXPECT_THAT(num_total_hits(), Eq(5));
+ result_state.IncrementNumTotalHits(500);
+ EXPECT_THAT(num_total_hits(), Eq(505));
+}
+
+TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHitsWhenDestructed) {
+ std::vector<ScoredDocumentHit> scored_document_hits1 = {
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/3)};
+
+ std::vector<ScoredDocumentHit> scored_document_hits2 = {
+ AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/5)};
+
+ num_total_hits() = 2;
+ {
+ // Creates a ResultState with 5 ScoredDocumentHits.
+ ResultStateV2 result_state1(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits1),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/5), document_store());
+
+ absl_ports::unique_lock l(&result_state1.mutex);
+
+ result_state1.RegisterNumTotalHits(&num_total_hits());
+ ASSERT_THAT(num_total_hits(), Eq(7));
+
+ {
+ // Creates another ResultState with 2 ScoredDocumentHits.
+ ResultStateV2 result_state2(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits2),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/5), document_store());
+
+ absl_ports::unique_lock l(&result_state2.mutex);
+
+ result_state2.RegisterNumTotalHits(&num_total_hits());
+ ASSERT_THAT(num_total_hits(), Eq(9));
+ }
+
+ EXPECT_THAT(num_total_hits(), Eq(7));
+ }
+ EXPECT_THAT(num_total_hits(), Eq(2));
+}
+
+TEST_F(ResultStateV2Test, ShouldNotUpdateNumTotalHitsWhenNotRegistered) {
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/3)};
+
+ // Creates a ResultState with 5 ScoredDocumentHits.
+ {
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/5), document_store());
+
+ {
+ absl_ports::unique_lock l(&result_state.mutex);
+
+ EXPECT_THAT(num_total_hits(), Eq(0));
+ result_state.IncrementNumTotalHits(500);
+ EXPECT_THAT(num_total_hits(), Eq(0));
+ }
+ }
+ EXPECT_THAT(num_total_hits(), Eq(0));
+}
+
+TEST_F(ResultStateV2Test, ShouldDecrementOriginalNumTotalHitsWhenReregister) {
+ std::atomic<int> another_num_total_hits = 11;
+
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/3)};
+
+ // Creates a ResultState with 5 ScoredDocumentHits.
+ ResultStateV2 result_state(
+ std::make_unique<PriorityQueueScoredDocumentHitsRanker>(
+ std::move(scored_document_hits),
+ /*is_descending=*/true),
+ /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ CreateResultSpec(/*num_per_page=*/5), document_store());
+
+ absl_ports::unique_lock l(&result_state.mutex);
+
+ num_total_hits() = 7;
+ result_state.RegisterNumTotalHits(&num_total_hits());
+ EXPECT_THAT(num_total_hits(), Eq(12));
+
+ result_state.RegisterNumTotalHits(&another_num_total_hits);
+ // The original num_total_hits should be decremented after re-registration.
+ EXPECT_THAT(num_total_hits(), Eq(7));
+ // another_num_total_hits should be incremented after re-registration.
+ EXPECT_THAT(another_num_total_hits, Eq(16));
+
+ result_state.IncrementNumTotalHits(500);
+ // The original num_total_hits should be unchanged.
+ EXPECT_THAT(num_total_hits(), Eq(7));
+ // Increment should be done on another_num_total_hits.
+ EXPECT_THAT(another_num_total_hits, Eq(516));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-state.cc b/icing/result/result-state.cc
index fc89185..24f5c09 100644
--- a/icing/result/result-state.cc
+++ b/icing/result/result-state.cc
@@ -82,13 +82,15 @@ class GroupResultLimiter {
// Returns true if the scored_document_hit should be removed.
bool operator()(const ScoredDocumentHit& scored_document_hit) {
- auto document_filter_data_or = document_store_.GetDocumentFilterData(
- scored_document_hit.document_id());
- if (!document_filter_data_or.ok()) {
+ auto document_filter_data_optional =
+ document_store_.GetAliveDocumentFilterData(
+ scored_document_hit.document_id());
+ if (!document_filter_data_optional) {
+ // Document doesn't exist.
return true;
}
NamespaceId namespace_id =
- document_filter_data_or.ValueOrDie().namespace_id();
+ document_filter_data_optional.value().namespace_id();
auto iter = namespace_group_id_map_.find(namespace_id);
if (iter == namespace_group_id_map_.end()) {
return false;
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index fc50ea6..653f34f 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -27,6 +27,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
#include "icing/proto/document.pb.h"
@@ -35,7 +36,7 @@
#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
-#include "icing/store/key-mapper.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -49,8 +50,9 @@ constexpr char kSchemaStoreHeaderFilename[] = "schema_store_header";
constexpr char kSchemaFilename[] = "schema.pb";
constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper";
-// A KeyMapper stores its data across 3 arrays internally. Giving each array
-// 128KiB for storage means the entire KeyMapper requires 384KiB.
+// A DynamicTrieKeyMapper stores its data across 3 arrays internally. Giving
+// each array 128KiB for storage means the entire DynamicTrieKeyMapper requires
+// 384KiB.
constexpr int32_t kSchemaTypeMapperMaxSize = 3 * 128 * 1024; // 384 KiB
const std::string MakeHeaderFilename(const std::string& base_dir) {
@@ -196,8 +198,8 @@ libtextclassifier3::Status SchemaStore::InitializeInternal(
if (initialize_stats != nullptr) {
initialize_stats->set_num_schema_types(type_config_map_.size());
}
-
has_schema_successfully_set_ = true;
+
return libtextclassifier3::Status::OK;
}
@@ -222,9 +224,9 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
ICING_ASSIGN_OR_RETURN(
schema_type_mapper_,
- KeyMapper<SchemaTypeId>::Create(*filesystem_,
- MakeSchemaTypeMapperFilename(base_dir_),
- kSchemaTypeMapperMaxSize));
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ *filesystem_, MakeSchemaTypeMapperFilename(base_dir_),
+ kSchemaTypeMapperMaxSize));
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
if (checksum.Get() != header.checksum) {
@@ -307,8 +309,9 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
schema_type_mapper_.reset();
// TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
- libtextclassifier3::Status status = KeyMapper<SchemaTypeId>::Delete(
- *filesystem_, MakeSchemaTypeMapperFilename(base_dir_));
+ libtextclassifier3::Status status =
+ DynamicTrieKeyMapper<SchemaTypeId>::Delete(
+ *filesystem_, MakeSchemaTypeMapperFilename(base_dir_));
if (!status.ok()) {
ICING_LOG(ERROR) << status.error_message()
<< "Failed to delete old schema_type mapper";
@@ -316,9 +319,9 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
}
ICING_ASSIGN_OR_RETURN(
schema_type_mapper_,
- KeyMapper<SchemaTypeId>::Create(*filesystem_,
- MakeSchemaTypeMapperFilename(base_dir_),
- kSchemaTypeMapperMaxSize));
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
+ *filesystem_, MakeSchemaTypeMapperFilename(base_dir_),
+ kSchemaTypeMapperMaxSize));
return libtextclassifier3::Status::OK;
}
@@ -447,46 +450,29 @@ libtextclassifier3::Status SchemaStore::ApplySchemaChange(
std::string temp_schema_store_dir_path = base_dir_ + "_temp";
if (!filesystem_->DeleteDirectoryRecursively(
temp_schema_store_dir_path.c_str())) {
- ICING_LOG(WARNING) << "Failed to recursively delete "
+ ICING_LOG(ERROR) << "Recursively deleting "
<< temp_schema_store_dir_path.c_str();
return absl_ports::InternalError(
"Unable to delete temp directory to prepare to build new schema "
"store.");
}
- if (!filesystem_->CreateDirectoryRecursively(
- temp_schema_store_dir_path.c_str())) {
+ DestructibleDirectory temp_schema_store_dir(
+ filesystem_, std::move(temp_schema_store_dir_path));
+ if (!temp_schema_store_dir.is_valid()) {
return absl_ports::InternalError(
"Unable to create temp directory to build new schema store.");
}
// Then we create our new schema store with the new schema.
- auto new_schema_store_or =
- SchemaStore::Create(filesystem_, temp_schema_store_dir_path, clock_,
- std::move(new_schema));
- if (!new_schema_store_or.ok()) {
- // Attempt to clean up the temp directory.
- if (!filesystem_->DeleteDirectoryRecursively(
- temp_schema_store_dir_path.c_str())) {
- // Nothing to do here. Just log an error.
- ICING_LOG(WARNING) << "Failed to recursively delete "
- << temp_schema_store_dir_path.c_str();
- }
- return new_schema_store_or.status();
- }
- std::unique_ptr<SchemaStore> new_schema_store =
- std::move(new_schema_store_or).ValueOrDie();
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<SchemaStore> new_schema_store,
+ SchemaStore::Create(filesystem_, temp_schema_store_dir.dir(), clock_,
+ std::move(new_schema)));
// Then we swap the new schema file + new derived files with the old files.
if (!filesystem_->SwapFiles(base_dir_.c_str(),
- temp_schema_store_dir_path.c_str())) {
- // Attempt to clean up the temp directory.
- if (!filesystem_->DeleteDirectoryRecursively(
- temp_schema_store_dir_path.c_str())) {
- // Nothing to do here. Just log an error.
- ICING_LOG(WARNING) << "Failed to recursively delete "
- << temp_schema_store_dir_path.c_str();
- }
+ temp_schema_store_dir.dir().c_str())) {
return absl_ports::InternalError(
"Unable to apply new schema due to failed swap!");
}
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 58e5477..82f4ffa 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -130,7 +130,7 @@ class SchemaStore {
static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, InitializeStatsProto* initialize_stats = nullptr);
-
+
SchemaStore(SchemaStore&&) = default;
SchemaStore& operator=(SchemaStore&&) = default;
@@ -282,7 +282,6 @@ class SchemaStore {
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, SchemaProto schema);
-
// Use SchemaStore::Create instead.
explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
const Clock* clock);
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 3fd41c4..ffd1292 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -18,6 +18,7 @@
#include <string>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/absl_ports/str_cat.h"
@@ -35,7 +36,6 @@
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/tmp-directory.h"
-#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -73,8 +73,8 @@ constexpr PropertyConfigProto::DataType::Code TYPE_DOUBLE =
class SchemaStoreTest : public ::testing::Test {
protected:
void SetUp() override {
- temp_dir_ = GetTestTempDir() + "/icing";
- schema_store_dir_ = temp_dir_ + "/schema_store";
+ test_dir_ = GetTestTempDir() + "/icing";
+ schema_store_dir_ = test_dir_ + "/schema_store";
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
schema_ =
@@ -93,24 +93,24 @@ class SchemaStoreTest : public ::testing::Test {
// schema_store_dir_. IOW, ensure that all temporary directories have been
// properly cleaned up.
std::vector<std::string> sub_dirs;
- ASSERT_TRUE(filesystem_.ListDirectory(temp_dir_.c_str(), &sub_dirs));
+ ASSERT_TRUE(filesystem_.ListDirectory(test_dir_.c_str(), &sub_dirs));
ASSERT_THAT(sub_dirs, ElementsAre("schema_store"));
// Finally, clean everything up.
- ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(temp_dir_.c_str()));
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
}
Filesystem filesystem_;
- std::string temp_dir_;
+ std::string test_dir_;
std::string schema_store_dir_;
SchemaProto schema_;
FakeClock fake_clock_;
};
TEST_F(SchemaStoreTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(
- SchemaStore::Create(/*filesystem=*/nullptr, schema_store_dir_, &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, schema_store_dir_,
+ &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
@@ -215,15 +215,17 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) {
.AddType(SchemaTypeConfigBuilder().SetType("corrupted"))
.Build();
- const std::string schema_file = absl_ports::StrCat(schema_store_dir_, "/schema.pb");
+ const std::string schema_file =
+ absl_ports::StrCat(schema_store_dir_, "/schema.pb");
const std::string serialized_schema = corrupt_schema.SerializeAsString();
filesystem_.Write(schema_file.c_str(), serialized_schema.data(),
serialized_schema.size());
// If ground truth was corrupted, we won't know what to do
- EXPECT_THAT(SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
@@ -350,8 +352,9 @@ TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
IsOkAndHolds(EqualsSetSchemaResult(result)));
schema_store.reset();
- EXPECT_THAT(SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
- IsOk());
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ IsOk());
}
TEST_F(SchemaStoreTest, MultipleCreateOk) {
@@ -383,7 +386,8 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) {
schema_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
// Verify that our in-memory structures are ok
EXPECT_THAT(schema_store->GetSchemaTypeConfig("email"),
@@ -1017,7 +1021,8 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
schema_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
@@ -1082,7 +1087,8 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
// And we get the same schema back on reinitialization
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
}
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 3dcc5a9..cb7c561 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -23,6 +23,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/schema-util.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
#include "icing/store/key-mapper.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -78,11 +79,11 @@ class SectionManagerTest : public ::testing::Test {
}
void SetUp() override {
- // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
- // the total KeyMapper should get 384KiB
+ // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
+ // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
int key_mapper_size = 3 * 128 * 1024;
ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
- KeyMapper<SchemaTypeId>::Create(
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(
filesystem_, test_dir_, key_mapper_size));
ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
@@ -397,13 +398,14 @@ TEST_F(SectionManagerTest,
type_with_non_string_properties);
type_config_map.emplace(empty_type.schema_type(), empty_type);
- // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
- // the total KeyMapper should get 384KiB
+ // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one
+ // 128KiB so the total DynamicTrieKeyMapper should get 384KiB
int key_mapper_size = 3 * 128 * 1024;
std::string dir = GetTestTempDir() + "/non_string_fields";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
- KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size));
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir,
+ key_mapper_size));
ICING_ASSERT_OK(schema_type_mapper->Put(
type_with_non_string_properties.schema_type(), /*schema_type_id=*/0));
ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(),
@@ -486,13 +488,14 @@ TEST_F(SectionManagerTest, AssignSectionsRecursivelyForDocumentFields) {
type_config_map.emplace(type.schema_type(), type);
type_config_map.emplace(document_type.schema_type(), document_type);
- // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
- // the total KeyMapper should get 384KiB
+ // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one
+ // 128KiB so the total DynamicTrieKeyMapper should get 384KiB
int key_mapper_size = 3 * 128 * 1024;
std::string dir = GetTestTempDir() + "/recurse_into_document";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
- KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size));
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir,
+ key_mapper_size));
int type_schema_type_id = 0;
int document_type_schema_type_id = 1;
ICING_ASSERT_OK(
@@ -560,13 +563,14 @@ TEST_F(SectionManagerTest, DontAssignSectionsRecursivelyForDocumentFields) {
type_config_map.emplace(type.schema_type(), type);
type_config_map.emplace(document_type.schema_type(), document_type);
- // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
- // the total KeyMapper should get 384KiB
+ // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one
+ // 128KiB so the total DynamicTrieKeyMapper should get 384KiB
int key_mapper_size = 3 * 128 * 1024;
std::string dir = GetTestTempDir() + "/recurse_into_document";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
- KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size));
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir,
+ key_mapper_size));
int type_schema_type_id = 0;
int document_type_schema_type_id = 1;
ICING_ASSERT_OK(
diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc
index 28d385e..4b426a9 100644
--- a/icing/scoring/bm25f-calculator.cc
+++ b/icing/scoring/bm25f-calculator.cc
@@ -233,8 +233,9 @@ float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
}
SchemaTypeId Bm25fCalculator::GetSchemaTypeId(DocumentId document_id) const {
- auto filter_data_or = document_store_->GetDocumentFilterData(document_id);
- if (!filter_data_or.ok()) {
+ auto filter_data_optional =
+ document_store_->GetAliveDocumentFilterData(document_id);
+ if (!filter_data_optional) {
// This should never happen. The only failure case for
// GetDocumentFilterData is if the document_id is outside of the range of
// allocated document_ids, which shouldn't be possible since we're getting
@@ -243,8 +244,7 @@ SchemaTypeId Bm25fCalculator::GetSchemaTypeId(DocumentId document_id) const {
"No document filter data for document [%d]", document_id);
return kInvalidSchemaTypeId;
}
- DocumentFilterData data = filter_data_or.ValueOrDie();
- return data.schema_type_id();
+ return filter_data_optional.value().schema_type_id();
}
} // namespace lib
diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker.cc b/icing/scoring/priority-queue-scored-document-hits-ranker.cc
new file mode 100644
index 0000000..13da0ae
--- /dev/null
+++ b/icing/scoring/priority-queue-scored-document-hits-ranker.cc
@@ -0,0 +1,55 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+
+#include <queue>
+#include <vector>
+
+#include "icing/scoring/scored-document-hit.h"
+
+namespace icing {
+namespace lib {
+
+PriorityQueueScoredDocumentHitsRanker::PriorityQueueScoredDocumentHitsRanker(
+ const std::vector<ScoredDocumentHit>& scored_document_hits,
+ bool is_descending)
+ : comparator_(/*is_ascending=*/!is_descending),
+ scored_document_hits_pq_(scored_document_hits.begin(),
+ scored_document_hits.end(), comparator_) {}
+
+ScoredDocumentHit PriorityQueueScoredDocumentHitsRanker::PopNext() {
+ ScoredDocumentHit ret = scored_document_hits_pq_.top();
+ scored_document_hits_pq_.pop();
+ return ret;
+}
+
+void PriorityQueueScoredDocumentHitsRanker::TruncateHitsTo(int new_size) {
+ if (new_size < 0 || scored_document_hits_pq_.size() <= new_size) {
+ return;
+ }
+
+ // Copying the best new_size results.
+ std::priority_queue<ScoredDocumentHit, std::vector<ScoredDocumentHit>,
+ Comparator>
+ new_pq(comparator_);
+ for (int i = 0; i < new_size; ++i) {
+ new_pq.push(scored_document_hits_pq_.top());
+ scored_document_hits_pq_.pop();
+ }
+ scored_document_hits_pq_ = std::move(new_pq);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker.h b/icing/scoring/priority-queue-scored-document-hits-ranker.h
new file mode 100644
index 0000000..c104585
--- /dev/null
+++ b/icing/scoring/priority-queue-scored-document-hits-ranker.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
+#define ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
+
+#include <queue>
+#include <vector>
+
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+
+namespace icing {
+namespace lib {
+
+// ScoredDocumentHitsRanker interface implementation, based on
+// std::priority_queue. We can get next top hit in O(lgN) time.
+class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker {
+ public:
+ explicit PriorityQueueScoredDocumentHitsRanker(
+ const std::vector<ScoredDocumentHit>& scored_document_hits,
+ bool is_descending = true);
+
+ ~PriorityQueueScoredDocumentHitsRanker() override = default;
+
+ ScoredDocumentHit PopNext() override;
+
+ void TruncateHitsTo(int new_size) override;
+
+ int size() const override { return scored_document_hits_pq_.size(); }
+
+ bool empty() const override { return scored_document_hits_pq_.empty(); }
+
+ private:
+ // Comparator for std::priority_queue. Since std::priority is a max heap
+ // (descending order), reverse it if we want ascending order.
+ class Comparator {
+ public:
+ explicit Comparator(bool is_ascending) : is_ascending_(is_ascending) {}
+
+ bool operator()(const ScoredDocumentHit& lhs,
+ const ScoredDocumentHit& rhs) const {
+ return is_ascending_ == !(lhs < rhs);
+ }
+
+ private:
+ bool is_ascending_;
+ };
+
+ Comparator comparator_;
+
+ // Use priority queue to get top K hits in O(KlgN) time.
+ std::priority_queue<ScoredDocumentHit, std::vector<ScoredDocumentHit>,
+ Comparator>
+ scored_document_hits_pq_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc b/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc
new file mode 100644
index 0000000..a575eaf
--- /dev/null
+++ b/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc
@@ -0,0 +1,239 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+std::vector<ScoredDocumentHit> PopAll(
+ PriorityQueueScoredDocumentHitsRanker& ranker) {
+ std::vector<ScoredDocumentHit> hits;
+ while (!ranker.empty()) {
+ hits.push_back(ranker.PopNext());
+ }
+ return hits;
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldGetCorrectSizeAndEmpty) {
+ ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/1);
+
+ PriorityQueueScoredDocumentHitsRanker ranker(
+ {scored_hit_1, scored_hit_0, scored_hit_2},
+ /*is_descending=*/true);
+ EXPECT_THAT(ranker.size(), Eq(3));
+ EXPECT_FALSE(ranker.empty());
+
+ ranker.PopNext();
+ EXPECT_THAT(ranker.size(), Eq(2));
+ EXPECT_FALSE(ranker.empty());
+
+ ranker.PopNext();
+ EXPECT_THAT(ranker.size(), Eq(1));
+ EXPECT_FALSE(ranker.empty());
+
+ ranker.PopNext();
+ EXPECT_THAT(ranker.size(), Eq(0));
+ EXPECT_TRUE(ranker.empty());
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldRankInDescendingOrder) {
+ ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+ /*score=*/1);
+
+ PriorityQueueScoredDocumentHitsRanker ranker(
+ {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+ /*is_descending=*/true);
+
+ EXPECT_THAT(ranker, SizeIs(5));
+ std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker);
+ EXPECT_THAT(scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_4),
+ EqualsScoredDocumentHit(scored_hit_3),
+ EqualsScoredDocumentHit(scored_hit_2),
+ EqualsScoredDocumentHit(scored_hit_1),
+ EqualsScoredDocumentHit(scored_hit_0)));
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldRankInAscendingOrder) {
+ ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+ /*score=*/1);
+
+ PriorityQueueScoredDocumentHitsRanker ranker(
+ {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+ /*is_descending=*/false);
+
+ EXPECT_THAT(ranker, SizeIs(5));
+ std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker);
+ EXPECT_THAT(scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_0),
+ EqualsScoredDocumentHit(scored_hit_1),
+ EqualsScoredDocumentHit(scored_hit_2),
+ EqualsScoredDocumentHit(scored_hit_3),
+ EqualsScoredDocumentHit(scored_hit_4)));
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest,
+ ShouldRankDuplicateScoredDocumentHits) {
+ ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+ /*score=*/1);
+
+ PriorityQueueScoredDocumentHitsRanker ranker(
+ {scored_hit_2, scored_hit_4, scored_hit_1, scored_hit_0, scored_hit_2,
+ scored_hit_2, scored_hit_4, scored_hit_3},
+ /*is_descending=*/true);
+
+ EXPECT_THAT(ranker, SizeIs(8));
+ std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker);
+ EXPECT_THAT(scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_4),
+ EqualsScoredDocumentHit(scored_hit_4),
+ EqualsScoredDocumentHit(scored_hit_3),
+ EqualsScoredDocumentHit(scored_hit_2),
+ EqualsScoredDocumentHit(scored_hit_2),
+ EqualsScoredDocumentHit(scored_hit_2),
+ EqualsScoredDocumentHit(scored_hit_1),
+ EqualsScoredDocumentHit(scored_hit_0)));
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest,
+ ShouldRankEmptyScoredDocumentHits) {
+ PriorityQueueScoredDocumentHitsRanker ranker(/*scored_document_hits=*/{},
+ /*is_descending=*/true);
+ EXPECT_THAT(ranker, IsEmpty());
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldTruncateToNewSize) {
+ ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+ /*score=*/1);
+
+ PriorityQueueScoredDocumentHitsRanker ranker(
+ {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+ /*is_descending=*/true);
+ ASSERT_THAT(ranker, SizeIs(5));
+
+ ranker.TruncateHitsTo(/*new_size=*/3);
+ EXPECT_THAT(ranker, SizeIs(3));
+ std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker);
+ EXPECT_THAT(scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_4),
+ EqualsScoredDocumentHit(scored_hit_3),
+ EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldTruncateToZero) {
+ ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+ /*score=*/1);
+
+ PriorityQueueScoredDocumentHitsRanker ranker(
+ {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+ /*is_descending=*/true);
+ ASSERT_THAT(ranker, SizeIs(5));
+
+ ranker.TruncateHitsTo(/*new_size=*/0);
+ EXPECT_THAT(ranker, IsEmpty());
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldNotTruncateToNegative) {
+ ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+ /*score=*/1);
+
+ PriorityQueueScoredDocumentHitsRanker ranker(
+ {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+ /*is_descending=*/true);
+ ASSERT_THAT(ranker, SizeIs(Eq(5)));
+
+ ranker.TruncateHitsTo(/*new_size=*/-1);
+ EXPECT_THAT(ranker, SizeIs(Eq(5)));
+ // Contents are not affected.
+ std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker);
+ EXPECT_THAT(scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_4),
+ EqualsScoredDocumentHit(scored_hit_3),
+ EqualsScoredDocumentHit(scored_hit_2),
+ EqualsScoredDocumentHit(scored_hit_1),
+ EqualsScoredDocumentHit(scored_hit_0)));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/scoring/ranker.cc b/icing/scoring/ranker.cc
index 117f44c..ad971d3 100644
--- a/icing/scoring/ranker.cc
+++ b/icing/scoring/ranker.cc
@@ -103,8 +103,7 @@ void HeapifyTermDown(std::vector<TermMetadata>& scored_terms,
// If the minimum is not the subtree root, swap and continue heapifying the
// lower level subtree.
if (min != target_subtree_root_index) {
- std::swap(scored_terms.at(min),
- scored_terms.at(target_subtree_root_index));
+ std::swap(scored_terms.at(min), scored_terms.at(target_subtree_root_index));
HeapifyTermDown(scored_terms, min);
}
}
@@ -146,35 +145,6 @@ TermMetadata PopRootTerm(std::vector<TermMetadata>& scored_terms) {
return root;
}
-// Helper function to extract the root from the heap. The heap structure will be
-// maintained.
-//
-// Returns:
-// The current root element on success
-// RESOURCE_EXHAUSTED_ERROR if heap is empty
-libtextclassifier3::StatusOr<ScoredDocumentHit> PopRoot(
- std::vector<ScoredDocumentHit>* scored_document_hits_heap,
- const ScoredDocumentHitComparator& scored_document_hit_comparator) {
- if (scored_document_hits_heap->empty()) {
- // An invalid ScoredDocumentHit
- return absl_ports::ResourceExhaustedError("Heap is empty");
- }
-
- // Steps to extract root from heap:
- // 1. copy out root
- ScoredDocumentHit root = scored_document_hits_heap->at(0);
- const size_t last_node_index = scored_document_hits_heap->size() - 1;
- // 2. swap root and the last node
- std::swap(scored_document_hits_heap->at(0),
- scored_document_hits_heap->at(last_node_index));
- // 3. remove last node
- scored_document_hits_heap->pop_back();
- // 4. heapify root
- Heapify(scored_document_hits_heap, /*target_subtree_root_index=*/0,
- scored_document_hit_comparator);
- return root;
-}
-
} // namespace
void BuildHeapInPlace(
@@ -203,6 +173,29 @@ void PushToTermHeap(TermMetadata term, int number_to_return,
}
}
+libtextclassifier3::StatusOr<ScoredDocumentHit> PopNextTopResultFromHeap(
+ std::vector<ScoredDocumentHit>* scored_document_hits_heap,
+ const ScoredDocumentHitComparator& scored_document_hit_comparator) {
+ if (scored_document_hits_heap->empty()) {
+ // An invalid ScoredDocumentHit
+ return absl_ports::ResourceExhaustedError("Heap is empty");
+ }
+
+ // Steps to extract root from heap:
+ // 1. copy out root
+ ScoredDocumentHit root = scored_document_hits_heap->at(0);
+ const size_t last_node_index = scored_document_hits_heap->size() - 1;
+ // 2. swap root and the last node
+ std::swap(scored_document_hits_heap->at(0),
+ scored_document_hits_heap->at(last_node_index));
+ // 3. remove last node
+ scored_document_hits_heap->pop_back();
+ // 4. heapify root
+ Heapify(scored_document_hits_heap, /*target_subtree_root_index=*/0,
+ scored_document_hit_comparator);
+ return root;
+}
+
std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results,
const ScoredDocumentHitComparator& scored_document_hit_comparator) {
@@ -211,7 +204,8 @@ std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
num_results, static_cast<int>(scored_document_hits_heap->size()));
while (result_size-- > 0) {
libtextclassifier3::StatusOr<ScoredDocumentHit> next_best_document_hit_or =
- PopRoot(scored_document_hits_heap, scored_document_hit_comparator);
+ PopNextTopResultFromHeap(scored_document_hits_heap,
+ scored_document_hit_comparator);
if (next_best_document_hit_or.ok()) {
scored_document_hit_result.push_back(
std::move(next_best_document_hit_or).ValueOrDie());
diff --git a/icing/scoring/ranker.h b/icing/scoring/ranker.h
index 81838f3..bfe1077 100644
--- a/icing/scoring/ranker.h
+++ b/icing/scoring/ranker.h
@@ -17,6 +17,7 @@
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/term-metadata.h"
#include "icing/scoring/scored-document-hit.h"
@@ -32,6 +33,17 @@ void BuildHeapInPlace(
std::vector<ScoredDocumentHit>* scored_document_hits,
const ScoredDocumentHitComparator& scored_document_hit_comparator);
+// Returns the single next top result (i.e. the current root element) from the
+// given heap and remove it from the heap. The heap structure will be
+// maintained.
+//
+// Returns:
+// The next top result element on success
+// RESOURCE_EXHAUSTED_ERROR if heap is empty
+libtextclassifier3::StatusOr<ScoredDocumentHit> PopNextTopResultFromHeap(
+ std::vector<ScoredDocumentHit>* scored_document_hits_heap,
+ const ScoredDocumentHitComparator& scored_document_hit_comparator);
+
// Returns the top num_results results from the given heap and remove those
// results from the heap. An empty vector will be returned if heap is empty.
//
diff --git a/icing/scoring/ranker_benchmark.cc b/icing/scoring/ranker_benchmark.cc
index 8983dd9..c2f13de 100644
--- a/icing/scoring/ranker_benchmark.cc
+++ b/icing/scoring/ranker_benchmark.cc
@@ -27,7 +27,7 @@ namespace {
// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
// //icing/scoring:ranker_benchmark
//
-// $ blaze-bin/icing/scoring/ranker_benchmark --benchmarks=all
+// $ blaze-bin/icing/scoring/ranker_benchmark --benchmark_filter=all
// --benchmark_memory_usage
//
// Run on an Android device:
@@ -38,7 +38,7 @@ namespace {
// $ adb push blaze-bin/icing/scoring/ranker_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/ranker_benchmark --benchmarks=all
+// $ adb shell /data/local/tmp/ranker_benchmark --benchmark_filter=all
void BM_GetTopN(benchmark::State& state) {
int num_to_score = state.range(0);
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index cc1d995..44dda3c 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -49,7 +49,7 @@
// //icing/scoring:score-and-rank_benchmark
//
// $ blaze-bin/icing/scoring/score-and-rank_benchmark
-// --benchmarks=all --benchmark_memory_usage
+// --benchmark_filter=all --benchmark_memory_usage
//
// Run on an Android device:
// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
@@ -59,7 +59,7 @@
// $ adb push blaze-bin/icing/scoring/score-and-rank_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/score-and-rank_benchmark --benchmarks=all
+// $ adb shell /data/local/tmp/score-and-rank_benchmark --benchmark_filter=all
namespace icing {
namespace lib {
diff --git a/icing/scoring/scored-document-hits-ranker.h b/icing/scoring/scored-document-hits-ranker.h
new file mode 100644
index 0000000..0287452
--- /dev/null
+++ b/icing/scoring/scored-document-hits-ranker.h
@@ -0,0 +1,53 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_
+#define ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_
+
+#include "icing/scoring/scored-document-hit.h"
+
+namespace icing {
+namespace lib {
+
+// TODO(sungyc): re-evaluate other similar implementations (e.g. std::sort +
+// std::queue/std::vector). Also revisit the capacity shrinking
+// issue for PopNext().
+
+// ScoredDocumentHitsRanker is an interface class for ranking
+// ScoredDocumentHits.
+class ScoredDocumentHitsRanker {
+ public:
+ virtual ~ScoredDocumentHitsRanker() = default;
+
+ // Pop the next top ScoredDocumentHit and return. It is undefined to call
+ // PopNext on an empty ranker, so the caller should check if it is not empty
+ // before calling.
+ virtual ScoredDocumentHit PopNext() = 0;
+
+ // Truncates the remaining ScoredDocumentHits to the given size. The best
+ // ScoredDocumentHits (according to the ranking policy) should be kept.
+ // If new_size is invalid (< 0), or greater or equal to # of remaining
+ // ScoredDocumentHits, then no action will be taken. Otherwise truncates the
+ // the remaining ScoredDocumentHits to the given size.
+ virtual void TruncateHitsTo(int new_size) = 0;
+
+ virtual int size() const = 0;
+
+ virtual bool empty() const = 0;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
index 5e23a8e..1739a50 100644
--- a/icing/store/document-log-creator.cc
+++ b/icing/store/document-log-creator.cc
@@ -18,7 +18,6 @@
#include <string>
#include <utility>
-#include "icing/text_classifier/lib3/utils/base/logging.h"
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/annotate.h"
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 8c8369c..aa3122b 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -46,13 +46,14 @@
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-log-creator.h"
-#include "icing/store/key-mapper.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
#include "icing/store/namespace-id.h"
#include "icing/store/usage-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
#include "icing/util/data-loss.h"
+#include "icing/util/fingerprint-util.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
@@ -77,8 +78,8 @@ constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
// because we allow up to 1 million DocumentIds.
constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024; // 36 MiB
-// 384 KiB for a KeyMapper would allow each internal array to have a max of
-// 128 KiB for storage.
+// 384 KiB for a DynamicTrieKeyMapper would allow each internal array to have a
+// max of 128 KiB for storage.
constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024; // 384 KiB
constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024; // 384 KiB
@@ -125,22 +126,13 @@ std::string MakeCorpusMapperFilename(const std::string& base_dir) {
// overhead per key. As we know that these fingerprints are always 8-bytes in
// length and that they're random, we might be able to store them more
// compactly.
-std::string MakeFingerprint(std::string_view name_space, std::string_view uri) {
+std::string MakeFingerprint(std::string_view field1, std::string_view field2) {
// Using a 64-bit fingerprint to represent the key could lead to collisions.
// But, even with 200K unique keys, the probability of collision is about
// one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
uint64_t fprint =
- tc3farmhash::Fingerprint64(absl_ports::StrCat(name_space, uri));
-
- std::string encoded_fprint;
- // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in
- // base128 and add 1 to make sure that no byte is '0'. This increases the
- // size of the encoded_fprint from 8-bytes to 10-bytes.
- while (fprint) {
- encoded_fprint.push_back((fprint & 0x7F) + 1);
- fprint >>= 7;
- }
- return encoded_fprint;
+ tc3farmhash::Fingerprint64(absl_ports::StrCat(field1, field2));
+ return fingerprint_util::GetFingerprintString(fprint);
}
int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
@@ -266,12 +258,13 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
GetRecoveryCause(create_result, force_recovery_and_revalidate_documents);
if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) {
- ICING_LOG(WARNING) << "Starting Document Store Recovery with cause="
- << recovery_cause << ", and create result { new_file="
- << create_result.new_file << ", preeisting_file_version="
- << create_result.preexisting_file_version << ", data_loss="
- << create_result.log_create_result.data_loss << "} and kCurrentVersion="
- << DocumentLogCreator::kCurrentVersion;
+ ICING_LOG(INFO) << "Starting Document Store Recovery with cause="
+ << recovery_cause << ", and create result { new_file="
+ << create_result.new_file << ", preeisting_file_version="
+ << create_result.preexisting_file_version << ", data_loss="
+ << create_result.log_create_result.data_loss
+ << "} and kCurrentVersion="
+ << DocumentLogCreator::kCurrentVersion;
// We can't rely on any existing derived files. Recreate them from scratch.
// Currently happens if:
// 1) This is a new log and we don't have derived files yet
@@ -348,8 +341,11 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
// TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
- auto document_key_mapper_or =
- KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize);
+ auto document_key_mapper_or = DynamicTrieKeyMapper<
+ DocumentId,
+ fingerprint_util::FingerprintStringFormatter>::Create(*filesystem_,
+ base_dir_,
+ kUriMapperMaxSize);
if (!document_key_mapper_or.ok()) {
ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
<< "Failed to initialize KeyMapper";
@@ -381,18 +377,23 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
ICING_ASSIGN_OR_RETURN(
namespace_mapper_,
- KeyMapper<NamespaceId>::Create(*filesystem_,
- MakeNamespaceMapperFilename(base_dir_),
- kNamespaceMapperMaxSize));
+ DynamicTrieKeyMapper<NamespaceId>::Create(
+ *filesystem_, MakeNamespaceMapperFilename(base_dir_),
+ kNamespaceMapperMaxSize));
ICING_ASSIGN_OR_RETURN(
usage_store_,
UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
- ICING_ASSIGN_OR_RETURN(corpus_mapper_,
- KeyMapper<CorpusId>::Create(
- *filesystem_, MakeCorpusMapperFilename(base_dir_),
- kCorpusMapperMaxSize));
+ auto corpus_mapper_or =
+ DynamicTrieKeyMapper<CorpusId,
+ fingerprint_util::FingerprintStringFormatter>::
+ Create(*filesystem_, MakeCorpusMapperFilename(base_dir_),
+ kCorpusMapperMaxSize);
+ if (!corpus_mapper_or.ok()) {
+ return std::move(corpus_mapper_or).status();
+ }
+ corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie();
ICING_ASSIGN_OR_RETURN(corpus_score_cache_,
FileBackedVector<CorpusAssociatedScoreData>::Create(
@@ -561,7 +562,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
// TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
libtextclassifier3::Status status =
- KeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
+ DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
if (!status.ok()) {
ICING_LOG(ERROR) << status.error_message()
<< "Failed to delete old key mapper";
@@ -570,8 +571,11 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
// TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
- auto document_key_mapper_or =
- KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize);
+ auto document_key_mapper_or = DynamicTrieKeyMapper<
+ DocumentId,
+ fingerprint_util::FingerprintStringFormatter>::Create(*filesystem_,
+ base_dir_,
+ kUriMapperMaxSize);
if (!document_key_mapper_or.ok()) {
ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
<< "Failed to re-init key mapper";
@@ -648,7 +652,7 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
namespace_mapper_.reset();
// TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
- libtextclassifier3::Status status = KeyMapper<NamespaceId>::Delete(
+ libtextclassifier3::Status status = DynamicTrieKeyMapper<NamespaceId>::Delete(
*filesystem_, MakeNamespaceMapperFilename(base_dir_));
if (!status.ok()) {
ICING_LOG(ERROR) << status.error_message()
@@ -657,9 +661,9 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
}
ICING_ASSIGN_OR_RETURN(
namespace_mapper_,
- KeyMapper<NamespaceId>::Create(*filesystem_,
- MakeNamespaceMapperFilename(base_dir_),
- kNamespaceMapperMaxSize));
+ DynamicTrieKeyMapper<NamespaceId>::Create(
+ *filesystem_, MakeNamespaceMapperFilename(base_dir_),
+ kNamespaceMapperMaxSize));
return libtextclassifier3::Status::OK;
}
@@ -668,17 +672,22 @@ libtextclassifier3::Status DocumentStore::ResetCorpusMapper() {
corpus_mapper_.reset();
// TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
- libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete(
+ libtextclassifier3::Status status = DynamicTrieKeyMapper<CorpusId>::Delete(
*filesystem_, MakeCorpusMapperFilename(base_dir_));
if (!status.ok()) {
ICING_LOG(ERROR) << status.error_message()
<< "Failed to delete old corpus_id mapper";
return status;
}
- ICING_ASSIGN_OR_RETURN(corpus_mapper_,
- KeyMapper<CorpusId>::Create(
- *filesystem_, MakeCorpusMapperFilename(base_dir_),
- kCorpusMapperMaxSize));
+ auto corpus_mapper_or =
+ DynamicTrieKeyMapper<CorpusId,
+ fingerprint_util::FingerprintStringFormatter>::
+ Create(*filesystem_, MakeCorpusMapperFilename(base_dir_),
+ kCorpusMapperMaxSize);
+ if (!corpus_mapper_or.ok()) {
+ return std::move(corpus_mapper_or).status();
+ }
+ corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie();
return libtextclassifier3::Status::OK;
}
@@ -931,7 +940,18 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
DocumentId document_id, bool clear_internal_fields) const {
- ICING_RETURN_IF_ERROR(DoesDocumentExistWithStatus(document_id));
+ auto document_filter_data_optional_ = GetAliveDocumentFilterData(document_id);
+ if (!document_filter_data_optional_) {
+ // The document doesn't exist. Let's check if the document id is invalid, we
+ // will return InvalidArgumentError. Otherwise we should return NOT_FOUND
+ // error.
+ if (!IsDocumentIdValid(document_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Document id '%d' invalid.", document_id));
+ }
+ return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+ "Document id '%d' doesn't exist", document_id));
+ }
auto document_log_offset_or = document_id_mapper_->Get(document_id);
if (!document_log_offset_or.ok()) {
@@ -991,7 +1011,7 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
}
const DocumentFilterData* data = status_or_data.ValueOrDie();
- if (InternalDoesDocumentExist(document_id)) {
+ if (GetAliveDocumentFilterData(document_id)) {
existing_namespace_ids.insert(data->namespace_id());
}
}
@@ -1004,43 +1024,15 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
return existing_namespaces;
}
-bool DocumentStore::DoesDocumentExist(DocumentId document_id) const {
- if (!IsDocumentIdValid(document_id)) {
- return false;
- }
-
- if (document_id >= document_id_mapper_->num_elements()) {
- // Somehow got an validly constructed document_id that the document store
- // doesn't know about
- return false;
- }
-
- return InternalDoesDocumentExist(document_id);
-}
-
-libtextclassifier3::Status DocumentStore::DoesDocumentExistWithStatus(
+std::optional<DocumentFilterData> DocumentStore::GetAliveDocumentFilterData(
DocumentId document_id) const {
if (!IsDocumentIdValid(document_id)) {
- return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
- "Document id '%d' invalid.", document_id));
+ return std::nullopt;
}
-
- if (document_id >= document_id_mapper_->num_elements()) {
- // Somehow got a validly constructed document_id that the document store
- // doesn't know about.
- return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
- "Unknown document id '%d'.", document_id));
+ if (IsDeleted(document_id)) {
+ return std::nullopt;
}
-
- if (!InternalDoesDocumentExist(document_id)) {
- return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
- "Document id '%d' doesn't exist", document_id));
- };
- return libtextclassifier3::Status::OK;
-}
-
-bool DocumentStore::InternalDoesDocumentExist(DocumentId document_id) const {
- return !IsDeleted(document_id) && !IsExpired(document_id);
+ return GetNonExpiredDocumentFilterData(document_id);
}
bool DocumentStore::IsDeleted(DocumentId document_id) const {
@@ -1057,21 +1049,27 @@ bool DocumentStore::IsDeleted(DocumentId document_id) const {
return file_offset == kDocDeletedFlag;
}
-bool DocumentStore::IsExpired(DocumentId document_id) const {
- auto filter_data_or = filter_cache_->Get(document_id);
+// Returns DocumentFilterData if the document is not expired. Otherwise,
+// std::nullopt.
+std::optional<DocumentFilterData>
+DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id) const {
+ auto filter_data_or = filter_cache_->GetCopy(document_id);
if (!filter_data_or.ok()) {
// This would only happen if document_id is out of range of the
// filter_cache, meaning we got some invalid document_id. Callers should
// already have checked that their document_id is valid or used
// DoesDocumentExist(WithStatus). Regardless, return true since the
// document doesn't exist.
- return true;
+ return std::nullopt;
}
- const DocumentFilterData* filter_data = filter_data_or.ValueOrDie();
+ DocumentFilterData document_filter_data = filter_data_or.ValueOrDie();
// Check if it's past the expiration time
- return clock_.GetSystemTimeMilliseconds() >=
- filter_data->expiration_timestamp_ms();
+ if (clock_.GetSystemTimeMilliseconds() >=
+ document_filter_data.expiration_timestamp_ms()) {
+ return std::nullopt;
+ }
+ return document_filter_data;
}
libtextclassifier3::Status DocumentStore::Delete(
@@ -1088,7 +1086,17 @@ libtextclassifier3::Status DocumentStore::Delete(
}
libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id) {
- ICING_RETURN_IF_ERROR(DoesDocumentExistWithStatus(document_id));
+ auto document_filter_data_optional_ = GetAliveDocumentFilterData(document_id);
+ if (!document_filter_data_optional_) {
+ // The document doesn't exist. We should return InvalidArgumentError if the
+ // document id is invalid. Otherwise we should return NOT_FOUND error.
+ if (!IsDocumentIdValid(document_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Document id '%d' invalid.", document_id));
+ }
+ return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+ "Document id '%d' doesn't exist", document_id));
+ }
auto document_log_offset_or = document_id_mapper_->Get(document_id);
if (!document_log_offset_or.ok()) {
@@ -1113,7 +1121,7 @@ libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId(
libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
- if (!DoesDocumentExist(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id)) {
return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
"Can't get usage scores, document id '%d' doesn't exist", document_id));
}
@@ -1162,27 +1170,9 @@ DocumentStore::GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const {
return corpus_scoring_data_or.status();
}
-libtextclassifier3::StatusOr<DocumentFilterData>
-DocumentStore::GetDocumentFilterData(DocumentId document_id) const {
- if (!DoesDocumentExist(document_id)) {
- return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
- "Can't get filter data, document id '%d' doesn't exist", document_id));
- }
-
- auto filter_data_or = filter_cache_->GetCopy(document_id);
- if (!filter_data_or.ok()) {
- ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
- << " from filter_cache_";
- return filter_data_or.status();
- }
- DocumentFilterData document_filter_data =
- std::move(filter_data_or).ValueOrDie();
- return document_filter_data;
-}
-
libtextclassifier3::StatusOr<UsageStore::UsageScores>
DocumentStore::GetUsageScores(DocumentId document_id) const {
- if (!DoesDocumentExist(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id)) {
return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
"Can't get usage scores, document id '%d' doesn't exist", document_id));
}
@@ -1197,7 +1187,7 @@ libtextclassifier3::Status DocumentStore::ReportUsage(
// We can use the internal version here because we got our document_id from
// our internal data structures. We would have thrown some error if the
// namespace and/or uri were incorrect.
- if (!InternalDoesDocumentExist(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id)) {
// Document was probably deleted or expired.
return absl_ports::NotFoundError(absl_ports::StrCat(
"Couldn't report usage on a nonexistent document: (namespace: '",
@@ -1415,7 +1405,7 @@ DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
UsageStore::UsageScores usage_scores = usage_scores_or.ValueOrDie();
// Update our stats
- if (IsExpired(document_id)) {
+ if (!GetNonExpiredDocumentFilterData(document_id)) {
++total_num_expired;
namespace_storage_info.set_num_expired_documents(
namespace_storage_info.num_expired_documents() + 1);
@@ -1529,7 +1519,7 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
int size = document_id_mapper_->num_elements();
for (DocumentId document_id = 0; document_id < size; document_id++) {
- if (!InternalDoesDocumentExist(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id)) {
// Skip nonexistent documents
continue;
}
@@ -1611,7 +1601,7 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
if (absl_ports::IsNotFound(document_or.status())) {
if (IsDeleted(document_id)) {
++num_deleted;
- } else if (IsExpired(document_id)) {
+ } else if (!GetNonExpiredDocumentFilterData(document_id)) {
++num_expired;
}
continue;
@@ -1680,7 +1670,7 @@ DocumentStore::GetOptimizeInfo() const {
int32_t num_documents = document_id_mapper_->num_elements();
for (DocumentId document_id = kMinDocumentId; document_id < num_documents;
++document_id) {
- if (!InternalDoesDocumentExist(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id)) {
++optimize_info.optimizable_docs;
}
@@ -1713,8 +1703,8 @@ DocumentStore::GetOptimizeInfo() const {
ICING_ASSIGN_OR_RETURN(const int64_t usage_store_file_size,
usage_store_->GetElementsFileSize());
- // We use a combined disk usage and file size for the KeyMapper because it's
- // backed by a trie, which has some sparse property bitmaps.
+ // We use a combined disk usage and file size for the DynamicTrieKeyMapper
+ // because it's backed by a trie, which has some sparse property bitmaps.
ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_size,
document_key_mapper_->GetElementsSize());
@@ -1794,7 +1784,7 @@ DocumentStore::CollectCorpusInfo() const {
const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
++document_id) {
- if (!InternalDoesDocumentExist(document_id)) {
+ if (!GetAliveDocumentFilterData(document_id)) {
continue;
}
ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index e6d2e5c..450b1b9 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -48,6 +48,7 @@
#include "icing/util/crc32.h"
#include "icing/util/data-loss.h"
#include "icing/util/document-validator.h"
+#include "icing/util/fingerprint-util.h"
namespace icing {
namespace lib {
@@ -198,19 +199,6 @@ class DocumentStore {
// or expired). Order of namespaces is undefined.
std::vector<std::string> GetAllNamespaces() const;
- // Check if a document exists. Existence means it hasn't been deleted and it
- // hasn't expired yet.
- //
- // NOTE: This should be used when callers don't care about error messages,
- // expect documents to be deleted/not found, or in frequently called code
- // paths that could cause performance issues. A signficant amount of CPU
- // cycles can be saved if we don't construct strings and create new Status
- // objects on the heap. See b/185822483.
- //
- // Returns:
- // boolean whether a document exists or not
- bool DoesDocumentExist(DocumentId document_id) const;
-
// Deletes the document identified by the given namespace and uri. The
// document proto will be erased immediately.
//
@@ -280,14 +268,15 @@ class DocumentStore {
libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
GetCorpusAssociatedScoreData(CorpusId corpus_id) const;
- // Returns the DocumentFilterData of the document specified by the DocumentId.
+ // Gets the document filter data if a document exists. Otherwise, will get a
+ // false optional.
+ //
+ // Existence means it hasn't been deleted and it hasn't expired yet.
//
// Returns:
- // DocumentFilterData on success
- // OUT_OF_RANGE if document_id is negative or exceeds previously seen
- // DocumentIds
- // NOT_FOUND if the document or the filter data is not found
- libtextclassifier3::StatusOr<DocumentFilterData> GetDocumentFilterData(
+ // True:DocumentFilterData if the given document exists.
+ // False if the given document doesn't exist.
+ std::optional<DocumentFilterData> GetAliveDocumentFilterData(
DocumentId document_id) const;
// Gets the usage scores of a document.
@@ -455,7 +444,9 @@ class DocumentStore {
std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
// Key (namespace + uri) to DocumentId mapping
- std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_;
+ std::unique_ptr<
+ KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>>
+ document_key_mapper_;
// DocumentId to file offset mapping
std::unique_ptr<FileBackedVector<int64_t>> document_id_mapper_;
@@ -491,7 +482,9 @@ class DocumentStore {
// unique id. A coprus is assigned an
// id when the first document belonging to that corpus is added to the
// DocumentStore. Corpus ids may be removed from the mapper during compaction.
- std::unique_ptr<KeyMapper<CorpusId>> corpus_mapper_;
+ std::unique_ptr<
+ KeyMapper<CorpusId, fingerprint_util::FingerprintStringFormatter>>
+ corpus_mapper_;
// A storage class that caches all usage scores. Usage scores are not
// considered as ground truth. Usage scores are associated with document ids
@@ -648,18 +641,6 @@ class DocumentStore {
libtextclassifier3::Status DoesDocumentExistWithStatus(
DocumentId document_id) const;
- // Check if a document exists. Existence means it hasn't been deleted and it
- // hasn't expired yet.
- //
- // This is for internal-use only because we assume that the document_id is
- // already valid. If you're unsure if the document_id is valid, use
- // DoesDocumentExist(document_id) instead, which will perform those additional
- // checks.
- //
- // Returns:
- // boolean whether a document exists or not
- bool InternalDoesDocumentExist(DocumentId document_id) const;
-
// Checks if a document has been deleted
//
// This is for internal-use only because we assume that the document_id is
@@ -674,7 +655,12 @@ class DocumentStore {
// already valid. If you're unsure if the document_id is valid, use
// DoesDocumentExist(document_id) instead, which will perform those additional
// checks.
- bool IsExpired(DocumentId document_id) const;
+
+ // Returns:
+ // True:DocumentFilterData if the given document isn't expired.
+ // False if the given doesn't document is expired.
+ std::optional<DocumentFilterData> GetNonExpiredDocumentFilterData(
+ DocumentId document_id) const;
// Updates the entry in the score cache for document_id.
libtextclassifier3::Status UpdateDocumentAssociatedScoreCache(
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index fc3fd9d..c4d2346 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -46,7 +46,7 @@
// //icing/store:document-store_benchmark
//
// $ blaze-bin/icing/store/document-store_benchmark
-// --benchmarks=all --benchmark_memory_usage
+// --benchmark_filter=all --benchmark_memory_usage
//
// Run on an Android device:
// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
@@ -57,7 +57,7 @@
// /data/local/tmp/
//
// $ adb shell /data/local/tmp/document-store_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
namespace icing {
namespace lib {
@@ -164,7 +164,8 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
// Check random document ids to see if they exist. Hopefully to simulate
// page faulting in different sections of our mmapped derived files.
int document_id = dist(random);
- benchmark::DoNotOptimize(document_store->DoesDocumentExist(document_id));
+ benchmark::DoNotOptimize(
+ document_store->GetAliveDocumentFilterData(document_id));
}
}
BENCHMARK(BM_DoesDocumentExistBenchmark);
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index a30b4e4..59e5d74 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -358,23 +358,22 @@ TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
doc_store->Put(DocumentProto(test_document2_)));
- EXPECT_THAT(doc_store->DoesDocumentExist(document_id1), IsTrue());
- EXPECT_THAT(doc_store->DoesDocumentExist(document_id2), IsTrue());
+ EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(document_id1));
+ EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(document_id2));
DocumentId invalid_document_id_negative = -1;
- EXPECT_THAT(doc_store->DoesDocumentExist(invalid_document_id_negative),
- IsFalse());
+ EXPECT_FALSE(
+ doc_store->GetAliveDocumentFilterData(invalid_document_id_negative));
DocumentId invalid_document_id_greater_than_max = kMaxDocumentId + 2;
- EXPECT_THAT(
- doc_store->DoesDocumentExist(invalid_document_id_greater_than_max),
- IsFalse());
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+ invalid_document_id_greater_than_max));
- EXPECT_THAT(doc_store->DoesDocumentExist(kInvalidDocumentId), IsFalse());
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(kInvalidDocumentId));
DocumentId invalid_document_id_out_of_range = document_id2 + 1;
- EXPECT_THAT(doc_store->DoesDocumentExist(invalid_document_id_out_of_range),
- IsFalse());
+ EXPECT_FALSE(
+ doc_store->GetAliveDocumentFilterData(invalid_document_id_out_of_range));
}
TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
@@ -485,6 +484,35 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
+TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ // Validates that deleting something non-existing won't append anything to
+ // ground truth
+ int64_t document_log_size_before = filesystem_.GetFileSize(
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
+
+ libtextclassifier3::Status status =
+ document_store->Delete("android$contacts/", "661");
+ EXPECT_THAT(status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ for (char c : status.error_message()) {
+ EXPECT_THAT(std::isprint(c), IsTrue());
+ }
+
+ int64_t document_log_size_after = filesystem_.GetFileSize(
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
+}
+
TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1130,12 +1158,15 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document_id2),
IsOkAndHolds(EqualsProto(test_document2_)));
-
// Checks derived filter cache
- EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
- IsOkAndHolds(DocumentFilterData(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(document_id2));
+ EXPECT_THAT(doc_filter_data,
+ Eq(DocumentFilterData(
/*namespace_id=*/0,
/*schema_type_id=*/0, document2_expiration_timestamp_)));
+
// Checks derived score cache
EXPECT_THAT(
doc_store->GetDocumentAssociatedScoreData(document_id2),
@@ -1220,10 +1251,14 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
IsOkAndHolds(EqualsProto(test_document2_)));
// Checks derived filter cache
- EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
- IsOkAndHolds(DocumentFilterData(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(document_id2));
+ EXPECT_THAT(doc_filter_data,
+ Eq(DocumentFilterData(
/*namespace_id=*/0,
/*schema_type_id=*/0, document2_expiration_timestamp_)));
+
// Checks derived score cache - note that they aren't regenerated from
// scratch.
EXPECT_THAT(
@@ -1293,8 +1328,11 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
IsOkAndHolds(EqualsProto(test_document2_)));
// Checks derived filter cache
- EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
- IsOkAndHolds(DocumentFilterData(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(document_id2));
+ EXPECT_THAT(doc_filter_data,
+ Eq(DocumentFilterData(
/*namespace_id=*/0,
/*schema_type_id=*/0, document2_expiration_timestamp_)));
// Checks derived score cache
@@ -1704,8 +1742,7 @@ TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- EXPECT_THAT(doc_store->GetDocumentFilterData(/*document_id=*/0),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(/*document_id=*/0));
}
TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
@@ -1719,17 +1756,17 @@ TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(test_document1_));
- EXPECT_THAT(
- doc_store->GetDocumentFilterData(document_id),
- IsOkAndHolds(DocumentFilterData(
- /*namespace_id=*/0,
- /*schema_type_id=*/0,
- /*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(document_id));
+ EXPECT_THAT(doc_filter_data,
+ Eq(DocumentFilterData(
+ /*namespace_id=*/0,
+ /*schema_type_id=*/0, document1_expiration_timestamp_)));
ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
// Associated entry of the deleted document is removed.
- EXPECT_THAT(doc_store->GetDocumentFilterData(document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(document_id));
}
TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
@@ -1857,12 +1894,13 @@ TEST_F(DocumentStoreTest,
std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
-
- EXPECT_THAT(
- doc_store->GetDocumentFilterData(document_id),
- IsOkAndHolds(DocumentFilterData(/*namespace_id=*/0,
- /*schema_type_id=*/0,
- /*expiration_timestamp_ms=*/1100)));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(document_id));
+ EXPECT_THAT(doc_filter_data, Eq(DocumentFilterData(
+ /*namespace_id=*/0,
+ /*schema_type_id=*/0,
+ /*expiration_timestamp_ms=*/1100)));
}
TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
@@ -1882,9 +1920,13 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(document_id));
+
EXPECT_THAT(
- doc_store->GetDocumentFilterData(document_id),
- IsOkAndHolds(DocumentFilterData(
+ doc_filter_data,
+ Eq(DocumentFilterData(
/*namespace_id=*/0,
/*schema_type_id=*/0,
/*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max())));
@@ -1908,9 +1950,13 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(document_id));
+
EXPECT_THAT(
- doc_store->GetDocumentFilterData(document_id),
- IsOkAndHolds(DocumentFilterData(
+ doc_filter_data,
+ Eq(DocumentFilterData(
/*namespace_id=*/0,
/*schema_type_id=*/0,
/*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max())));
@@ -2108,9 +2154,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
email_document_id, document_store->Put(DocumentProto(email_document)));
EXPECT_THAT(document_store->Get(email_document_id),
IsOkAndHolds(EqualsProto(email_document)));
- ICING_ASSERT_OK_AND_ASSIGN(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData email_data,
- document_store->GetDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(email_document_id));
EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id));
email_namespace_id = email_data.namespace_id();
email_expiration_timestamp = email_data.expiration_timestamp_ms();
@@ -2121,9 +2167,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
document_store->Put(DocumentProto(message_document)));
EXPECT_THAT(document_store->Get(message_document_id),
IsOkAndHolds(EqualsProto(message_document)));
- ICING_ASSERT_OK_AND_ASSIGN(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData message_data,
- document_store->GetDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(message_document_id));
EXPECT_THAT(message_data.schema_type_id(), Eq(message_schema_type_id));
message_namespace_id = message_data.namespace_id();
message_expiration_timestamp = message_data.expiration_timestamp_ms();
@@ -2161,9 +2207,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
// "email" document is fine
EXPECT_THAT(document_store->Get(email_document_id),
IsOkAndHolds(EqualsProto(email_document)));
- ICING_ASSERT_OK_AND_ASSIGN(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData email_data,
- document_store->GetDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(email_document_id));
EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id));
// Make sure that all the other fields are stll valid/the same
EXPECT_THAT(email_data.namespace_id(), Eq(email_namespace_id));
@@ -2173,9 +2219,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
// "message" document has an invalid SchemaTypeId
EXPECT_THAT(document_store->Get(message_document_id),
IsOkAndHolds(EqualsProto(message_document)));
- ICING_ASSERT_OK_AND_ASSIGN(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData message_data,
- document_store->GetDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(message_document_id));
EXPECT_THAT(message_data.schema_type_id(), Eq(-1));
// Make sure that all the other fields are stll valid/the same
EXPECT_THAT(message_data.namespace_id(), Eq(message_namespace_id));
@@ -2227,16 +2273,16 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store->Put(email_document));
- ICING_ASSERT_OK_AND_ASSIGN(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData email_data,
- document_store->GetDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(email_document_id));
EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store->Put(message_document));
- ICING_ASSERT_OK_AND_ASSIGN(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData message_data,
- document_store->GetDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(message_document_id));
EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id));
// Rearrange the schema types. Since SchemaTypeId is assigned based on order,
@@ -2260,12 +2306,14 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
// Check that the FilterCache holds the new SchemaTypeIds
- ICING_ASSERT_OK_AND_ASSIGN(
- email_data, document_store->GetDocumentFilterData(email_document_id));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ email_data,
+ document_store->GetAliveDocumentFilterData(email_document_id));
EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id));
- ICING_ASSERT_OK_AND_ASSIGN(
- message_data, document_store->GetDocumentFilterData(message_document_id));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ message_data,
+ document_store->GetAliveDocumentFilterData(message_document_id));
EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
}
@@ -2457,16 +2505,16 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store->Put(email_document));
- ICING_ASSERT_OK_AND_ASSIGN(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData email_data,
- document_store->GetDocumentFilterData(email_document_id));
+ document_store->GetAliveDocumentFilterData(email_document_id));
EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store->Put(message_document));
- ICING_ASSERT_OK_AND_ASSIGN(
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
DocumentFilterData message_data,
- document_store->GetDocumentFilterData(message_document_id));
+ document_store->GetAliveDocumentFilterData(message_document_id));
EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id));
// Rearrange the schema types. Since SchemaTypeId is assigned based on order,
@@ -2492,12 +2540,14 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
schema_store.get(), set_schema_result));
// Check that the FilterCache holds the new SchemaTypeIds
- ICING_ASSERT_OK_AND_ASSIGN(
- email_data, document_store->GetDocumentFilterData(email_document_id));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ email_data,
+ document_store->GetAliveDocumentFilterData(email_document_id));
EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id));
- ICING_ASSERT_OK_AND_ASSIGN(
- message_data, document_store->GetDocumentFilterData(message_document_id));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ message_data,
+ document_store->GetAliveDocumentFilterData(message_document_id));
EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
}
@@ -3379,8 +3429,9 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
.SetTtlMs(document1_ttl_)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
- doc_store->GetDocumentFilterData(docid));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData filter_data,
+ doc_store->GetAliveDocumentFilterData(docid));
ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
}
@@ -3420,8 +3471,9 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
std::move(create_result.document_store);
// Ensure that the type id of the email document has been correctly updated.
- ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
- doc_store->GetDocumentFilterData(docid));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData filter_data,
+ doc_store->GetAliveDocumentFilterData(docid));
EXPECT_THAT(filter_data.schema_type_id(), Eq(1));
EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
@@ -3477,8 +3529,9 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
.SetTtlMs(document1_ttl_)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
- doc_store->GetDocumentFilterData(docid));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData filter_data,
+ doc_store->GetAliveDocumentFilterData(docid));
ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
}
@@ -3516,8 +3569,9 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
std::move(create_result.document_store);
// Check that the type id of the email document has not been updated.
- ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
- doc_store->GetDocumentFilterData(docid));
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData filter_data,
+ doc_store->GetAliveDocumentFilterData(docid));
ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
}
}
@@ -3733,7 +3787,6 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
}
}
-#ifndef DISABLE_BACKWARDS_COMPAT_TEST
TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
// Set up schema.
SchemaProto schema =
@@ -3854,7 +3907,6 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
EXPECT_THAT(document_store->Get(/*document_id=*/2),
IsOkAndHolds(EqualsProto(document3)));
}
-#endif // DISABLE_BACKWARDS_COMPAT_TEST
TEST_F(DocumentStoreTest, GetDebugInfo) {
SchemaProto schema =
@@ -3928,8 +3980,9 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
.Build();
ICING_ASSERT_OK(document_store->Put(document4, 2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out1,
- document_store->GetDebugInfo(/*verbosity=*/1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentDebugInfoProto out1,
+ document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
EXPECT_THAT(out1.crc(), Gt(0));
EXPECT_THAT(out1.document_storage_info().num_alive_documents(), Eq(4));
EXPECT_THAT(out1.document_storage_info().num_deleted_documents(), Eq(0));
@@ -3957,8 +4010,9 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
// Delete document3.
ICING_ASSERT_OK(document_store->Delete("namespace2", "email/3"));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out2,
- document_store->GetDebugInfo(/*verbosity=*/1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentDebugInfoProto out2,
+ document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
EXPECT_THAT(out2.crc(), Gt(0));
EXPECT_THAT(out2.crc(), Not(Eq(out1.crc())));
EXPECT_THAT(out2.document_storage_info().num_alive_documents(), Eq(3));
@@ -3970,8 +4024,9 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2),
EqualsProto(info3)));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out3,
- document_store->GetDebugInfo(/*verbosity=*/0));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentDebugInfoProto out3,
+ document_store->GetDebugInfo(DebugInfoVerbosity::BASIC));
EXPECT_THAT(out3.corpus_info(), IsEmpty());
}
@@ -3989,8 +4044,9 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out,
- document_store->GetDebugInfo(/*verbosity=*/1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentDebugInfoProto out,
+ document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
EXPECT_THAT(out.crc(), Gt(0));
EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0));
EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0));
@@ -4005,8 +4061,9 @@ TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out,
- document_store->GetDebugInfo(/*verbosity=*/1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentDebugInfoProto out,
+ document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
EXPECT_THAT(out.crc(), Gt(0));
EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0));
EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0));
diff --git a/icing/store/dynamic-trie-key-mapper.h b/icing/store/dynamic-trie-key-mapper.h
new file mode 100644
index 0000000..dedd7b9
--- /dev/null
+++ b/icing/store/dynamic-trie-key-mapper.h
@@ -0,0 +1,299 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
+#define ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// File-backed mapping between the string key and a trivially copyable value
+// type.
+//
+// DynamicTrieKeyMapper is thread-compatible
+template <typename T, typename Formatter = absl_ports::DefaultFormatter>
+class DynamicTrieKeyMapper : public KeyMapper<T, Formatter> {
+ public:
+ // Returns an initialized instance of DynamicTrieKeyMapper that can
+ // immediately handle read/write operations.
+ // Returns any encountered IO errors.
+ //
+ // base_dir : Base directory used to save all the files required to persist
+ // DynamicTrieKeyMapper. If this base_dir was previously used to
+ // create a DynamicTrieKeyMapper, then this existing data would be
+ // loaded. Otherwise, an empty DynamicTrieKeyMapper would be
+ // created.
+ // maximum_size_bytes : The maximum allowable size of the key mapper storage.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
+ Create(const Filesystem& filesystem, std::string_view base_dir,
+ int maximum_size_bytes);
+
+ // Deletes all the files associated with the DynamicTrieKeyMapper. Returns
+ // success or any encountered IO errors
+ //
+ // base_dir : Base directory used to save all the files required to persist
+ // DynamicTrieKeyMapper. Should be the same as passed into
+ // Create().
+ static libtextclassifier3::Status Delete(const Filesystem& filesystem,
+ std::string_view base_dir);
+
+ ~DynamicTrieKeyMapper() override = default;
+
+ libtextclassifier3::Status Put(std::string_view key, T value) override;
+
+ libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
+ T next_value) override;
+
+ libtextclassifier3::StatusOr<T> Get(std::string_view key) const override;
+
+ bool Delete(std::string_view key) override;
+
+ std::unordered_map<T, std::string> GetValuesToKeys() const override;
+
+ int32_t num_keys() const override { return trie_.size(); }
+
+ libtextclassifier3::Status PersistToDisk() override;
+
+ libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override;
+
+ libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override;
+
+ Crc32 ComputeChecksum() override;
+
+ private:
+ static constexpr char kDynamicTrieKeyMapperDir[] = "key_mapper_dir";
+ static constexpr char kDynamicTrieKeyMapperPrefix[] = "key_mapper";
+
+ // Use DynamicTrieKeyMapper::Create() to instantiate.
+ explicit DynamicTrieKeyMapper(std::string_view key_mapper_dir);
+
+ // Load any existing DynamicTrieKeyMapper data from disk, or creates a new
+ // instance of DynamicTrieKeyMapper on disk and gets ready to process
+ // read/write operations.
+ //
+ // Returns any encountered IO errors.
+ libtextclassifier3::Status Initialize(int maximum_size_bytes);
+
+ const std::string file_prefix_;
+
+ // TODO(adorokhine) Filesystem is a forked class that's available both in
+ // icing and icing namespaces. We will need icing::Filesystem in order
+ // to use IcingDynamicTrie. Filesystem class should be fully refactored
+ // to have a single definition across both namespaces. Such a class should
+ // use icing (and general google3) coding conventions and behave like
+ // a proper C++ class.
+ const IcingFilesystem icing_filesystem_;
+ IcingDynamicTrie trie_;
+
+ static_assert(std::is_trivially_copyable<T>::value,
+ "T must be trivially copyable");
+};
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<
+ std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
+DynamicTrieKeyMapper<T, Formatter>::Create(const Filesystem& filesystem,
+ std::string_view base_dir,
+ int maximum_size_bytes) {
+ // We create a subdirectory since the trie creates and stores multiple files.
+ // This makes it easier to isolate the trie files away from other files that
+ // could potentially be in the same base_dir, and makes it easier to delete.
+ const std::string key_mapper_dir =
+ absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
+ if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to create DynamicTrieKeyMapper directory: ", key_mapper_dir));
+ }
+ auto mapper = std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>(
+ new DynamicTrieKeyMapper<T, Formatter>(key_mapper_dir));
+ ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
+ return mapper;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Delete(
+ const Filesystem& filesystem, std::string_view base_dir) {
+ std::string key_mapper_dir =
+ absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
+ if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to delete DynamicTrieKeyMapper directory: ", key_mapper_dir));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T, typename Formatter>
+DynamicTrieKeyMapper<T, Formatter>::DynamicTrieKeyMapper(
+ std::string_view key_mapper_dir)
+ : file_prefix_(
+ absl_ports::StrCat(key_mapper_dir, "/", kDynamicTrieKeyMapperPrefix)),
+ trie_(file_prefix_,
+ IcingDynamicTrie::RuntimeOptions().set_storage_policy(
+ IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
+ &icing_filesystem_) {}
+
+template <typename T, typename Formatter>
+libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Initialize(
+ int maximum_size_bytes) {
+ IcingDynamicTrie::Options options;
+ // Divide the max space between the three internal arrays: nodes, nexts and
+ // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
+ // MaxSuffixesSize is in units of bytes.
+ options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
+ options.max_nexts = options.max_nodes;
+ options.max_suffixes_size =
+ sizeof(IcingDynamicTrie::Node) * options.max_nodes;
+ options.value_size = sizeof(T);
+
+ if (!trie_.CreateIfNotExist(options)) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to create DynamicTrieKeyMapper file: ", file_prefix_));
+ }
+ if (!trie_.Init()) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to init DynamicTrieKeyMapper file: ", file_prefix_));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::GetOrPut(
+ std::string_view key, T next_value) {
+ std::string string_key(key);
+ uint32_t value_index;
+ if (!trie_.Insert(string_key.c_str(), &next_value, &value_index,
+ /*replace=*/false)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Unable to insert key ", Formatter()(string_key),
+ " into DynamicTrieKeyMapper ", file_prefix_, "."));
+ }
+ // This memory address could be unaligned since we're just grabbing the value
+ // from somewhere in the trie's suffix array. The suffix array is filled with
+ // chars, so the address might not be aligned to T values.
+ const T* unaligned_value =
+ static_cast<const T*>(trie_.GetValueAtIndex(value_index));
+
+ // memcpy the value to ensure that the returned value here is in a T-aligned
+ // address
+ T aligned_value;
+ memcpy(&aligned_value, unaligned_value, sizeof(T));
+ return aligned_value;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Put(
+ std::string_view key, T value) {
+ std::string string_key(key);
+ if (!trie_.Insert(string_key.c_str(), &value)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Unable to insert key ", Formatter()(string_key),
+ " into DynamicTrieKeyMapper ", file_prefix_, "."));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::Get(
+ std::string_view key) const {
+ std::string string_key(key);
+ T value;
+ if (!trie_.Find(string_key.c_str(), &value)) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Key not found ", Formatter()(string_key),
+ " in DynamicTrieKeyMapper ", file_prefix_, "."));
+ }
+ return value;
+}
+
+template <typename T, typename Formatter>
+bool DynamicTrieKeyMapper<T, Formatter>::Delete(std::string_view key) {
+ return trie_.Delete(key);
+}
+
+template <typename T, typename Formatter>
+std::unordered_map<T, std::string>
+DynamicTrieKeyMapper<T, Formatter>::GetValuesToKeys() const {
+ std::unordered_map<T, std::string> values_to_keys;
+ for (IcingDynamicTrie::Iterator itr(trie_, /*prefix=*/""); itr.IsValid();
+ itr.Advance()) {
+ if (itr.IsValid()) {
+ T value;
+ memcpy(&value, itr.GetValue(), sizeof(T));
+ values_to_keys.insert({value, itr.GetKey()});
+ }
+ }
+
+ return values_to_keys;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::PersistToDisk() {
+ if (!trie_.Sync()) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to sync DynamicTrieKeyMapper file: ", file_prefix_));
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<int64_t>
+DynamicTrieKeyMapper<T, Formatter>::GetDiskUsage() const {
+ int64_t size = trie_.GetDiskUsage();
+ if (size == IcingFilesystem::kBadFileSize || size < 0) {
+ return absl_ports::InternalError("Failed to get disk usage of key mapper");
+ }
+ return size;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<int64_t>
+DynamicTrieKeyMapper<T, Formatter>::GetElementsSize() const {
+ int64_t size = trie_.GetElementsSize();
+ if (size == IcingFilesystem::kBadFileSize || size < 0) {
+ return absl_ports::InternalError(
+ "Failed to get disk usage of elements in the key mapper");
+ }
+ return size;
+}
+
+template <typename T, typename Formatter>
+Crc32 DynamicTrieKeyMapper<T, Formatter>::ComputeChecksum() {
+ return Crc32(trie_.UpdateCrc());
+}
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
diff --git a/icing/store/key-mapper_test.cc b/icing/store/dynamic-trie-key-mapper_test.cc
index 4e3dd8a..03ba5f2 100644
--- a/icing/store/key-mapper_test.cc
+++ b/icing/store/dynamic-trie-key-mapper_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/store/key-mapper.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -29,9 +29,9 @@ using ::testing::UnorderedElementsAre;
namespace icing {
namespace lib {
namespace {
-constexpr int kMaxKeyMapperSize = 3 * 1024 * 1024; // 3 MiB
+constexpr int kMaxDynamicTrieKeyMapperSize = 3 * 1024 * 1024; // 3 MiB
-class KeyMapperTest : public testing::Test {
+class DynamicTrieKeyMapperTest : public testing::Test {
protected:
void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; }
@@ -43,36 +43,39 @@ class KeyMapperTest : public testing::Test {
Filesystem filesystem_;
};
-TEST_F(KeyMapperTest, InvalidBaseDir) {
- ASSERT_THAT(
- KeyMapper<DocumentId>::Create(filesystem_, "/dev/null", kMaxKeyMapperSize)
- .status()
- .error_message(),
- HasSubstr("Failed to create KeyMapper"));
+TEST_F(DynamicTrieKeyMapperTest, InvalidBaseDir) {
+ ASSERT_THAT(DynamicTrieKeyMapper<DocumentId>::Create(
+ filesystem_, "/dev/null", kMaxDynamicTrieKeyMapperSize)
+ .status()
+ .error_message(),
+ HasSubstr("Failed to create DynamicTrieKeyMapper"));
}
-TEST_F(KeyMapperTest, NegativeMaxKeyMapperSizeReturnsInternalError) {
- ASSERT_THAT(KeyMapper<DocumentId>::Create(filesystem_, base_dir_, -1),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+TEST_F(DynamicTrieKeyMapperTest, NegativeMaxKeyMapperSizeReturnsInternalError) {
+ ASSERT_THAT(
+ DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, -1),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
-TEST_F(KeyMapperTest, TooLargeMaxKeyMapperSizeReturnsInternalError) {
- ASSERT_THAT(KeyMapper<DocumentId>::Create(filesystem_, base_dir_,
- std::numeric_limits<int>::max()),
+TEST_F(DynamicTrieKeyMapperTest, TooLargeMaxKeyMapperSizeReturnsInternalError) {
+ ASSERT_THAT(DynamicTrieKeyMapper<DocumentId>::Create(
+ filesystem_, base_dir_, std::numeric_limits<int>::max()),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
-TEST_F(KeyMapperTest, CreateNewKeyMapper) {
+TEST_F(DynamicTrieKeyMapperTest, CreateNewKeyMapper) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+ std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper,
+ DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_,
+ kMaxDynamicTrieKeyMapperSize));
EXPECT_THAT(key_mapper->num_keys(), 0);
}
-TEST_F(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
+TEST_F(DynamicTrieKeyMapperTest, CanUpdateSameKeyMultipleTimes) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+ std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper,
+ DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_,
+ kMaxDynamicTrieKeyMapperSize));
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
ICING_EXPECT_OK(key_mapper->Put("default-youtube.com", 50));
@@ -88,10 +91,11 @@ TEST_F(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
EXPECT_THAT(key_mapper->num_keys(), 2);
}
-TEST_F(KeyMapperTest, GetOrPutOk) {
+TEST_F(DynamicTrieKeyMapperTest, GetOrPutOk) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+ std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper,
+ DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_,
+ kMaxDynamicTrieKeyMapperSize));
EXPECT_THAT(key_mapper->Get("foo"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -99,15 +103,16 @@ TEST_F(KeyMapperTest, GetOrPutOk) {
EXPECT_THAT(key_mapper->Get("foo"), IsOkAndHolds(1));
}
-TEST_F(KeyMapperTest, CanPersistToDiskRegularly) {
+TEST_F(DynamicTrieKeyMapperTest, CanPersistToDiskRegularly) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
- // Can persist an empty KeyMapper.
+ std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper,
+ DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_,
+ kMaxDynamicTrieKeyMapperSize));
+ // Can persist an empty DynamicTrieKeyMapper.
ICING_EXPECT_OK(key_mapper->PersistToDisk());
EXPECT_THAT(key_mapper->num_keys(), 0);
- // Can persist the smallest KeyMapper.
+ // Can persist the smallest DynamicTrieKeyMapper.
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
ICING_EXPECT_OK(key_mapper->PersistToDisk());
EXPECT_THAT(key_mapper->num_keys(), 1);
@@ -124,17 +129,18 @@ TEST_F(KeyMapperTest, CanPersistToDiskRegularly) {
EXPECT_THAT(key_mapper->num_keys(), 2);
}
-TEST_F(KeyMapperTest, CanUseAcrossMultipleInstances) {
+TEST_F(DynamicTrieKeyMapperTest, CanUseAcrossMultipleInstances) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+ std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper,
+ DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_,
+ kMaxDynamicTrieKeyMapperSize));
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
ICING_EXPECT_OK(key_mapper->PersistToDisk());
key_mapper.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+ key_mapper, DynamicTrieKeyMapper<DocumentId>::Create(
+ filesystem_, base_dir_, kMaxDynamicTrieKeyMapperSize));
EXPECT_THAT(key_mapper->num_keys(), 1);
EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(100));
@@ -146,30 +152,34 @@ TEST_F(KeyMapperTest, CanUseAcrossMultipleInstances) {
EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(300));
}
-TEST_F(KeyMapperTest, CanDeleteAndRestartKeyMapping) {
+TEST_F(DynamicTrieKeyMapperTest, CanDeleteAndRestartKeyMapping) {
// Can delete even if there's nothing there
- ICING_EXPECT_OK(KeyMapper<DocumentId>::Delete(filesystem_, base_dir_));
+ ICING_EXPECT_OK(
+ DynamicTrieKeyMapper<DocumentId>::Delete(filesystem_, base_dir_));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+ std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper,
+ DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_,
+ kMaxDynamicTrieKeyMapperSize));
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
ICING_EXPECT_OK(key_mapper->PersistToDisk());
- ICING_EXPECT_OK(KeyMapper<DocumentId>::Delete(filesystem_, base_dir_));
+ ICING_EXPECT_OK(
+ DynamicTrieKeyMapper<DocumentId>::Delete(filesystem_, base_dir_));
key_mapper.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+ key_mapper, DynamicTrieKeyMapper<DocumentId>::Create(
+ filesystem_, base_dir_, kMaxDynamicTrieKeyMapperSize));
EXPECT_THAT(key_mapper->num_keys(), 0);
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
EXPECT_THAT(key_mapper->num_keys(), 1);
}
-TEST_F(KeyMapperTest, GetValuesToKeys) {
+TEST_F(DynamicTrieKeyMapperTest, GetValuesToKeys) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
- KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+ std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper,
+ DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_,
+ kMaxDynamicTrieKeyMapperSize));
EXPECT_THAT(key_mapper->GetValuesToKeys(), IsEmpty());
ICING_EXPECT_OK(key_mapper->Put("foo", /*value=*/1));
diff --git a/icing/store/key-mapper.h b/icing/store/key-mapper.h
index 23c7b69..e05d1b7 100644
--- a/icing/store/key-mapper.h
+++ b/icing/store/key-mapper.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2019 Google LLC
+// Copyright (C) 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -17,81 +17,56 @@
#include <cstdint>
#include <cstring>
-#include <memory>
#include <string>
#include <string_view>
#include <type_traits>
+#include <unordered_map>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/file/filesystem.h"
-#include "icing/legacy/index/icing-dynamic-trie.h"
-#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/absl_ports/str_join.h"
#include "icing/util/crc32.h"
-#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
-// File-backed mapping between the string key and a trivially copyable value
-// type.
+// An interface for file-backed mapping between the string key and a trivially
+// copyable value type.
//
-// KeyMapper is thread-compatible
-template <typename T>
+// The implementation for KeyMapper should be thread-compatible
+template <typename T, typename Formatter = absl_ports::DefaultFormatter>
class KeyMapper {
public:
- // Returns an initialized instance of KeyMapper that can immediately handle
- // read/write operations.
- // Returns any encountered IO errors.
- //
- // base_dir : Base directory used to save all the files required to persist
- // KeyMapper. If this base_dir was previously used to create a
- // KeyMapper, then this existing data would be loaded. Otherwise,
- // an empty KeyMapper would be created.
- // maximum_size_bytes : The maximum allowable size of the key mapper storage.
- static libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> Create(
- const Filesystem& filesystem, std::string_view base_dir,
- int maximum_size_bytes);
-
- // Deletes all the files associated with the KeyMapper. Returns success or any
- // encountered IO errors
- //
- // base_dir : Base directory used to save all the files required to persist
- // KeyMapper. Should be the same as passed into Create().
- static libtextclassifier3::Status Delete(const Filesystem& filesystem,
- std::string_view base_dir);
-
- ~KeyMapper() = default;
+ virtual ~KeyMapper() = default;
// Inserts/Updates value for key.
// Returns any encountered IO errors.
//
// NOTE: Put() doesn't automatically flush changes to disk and relies on
// either explicit calls to PersistToDisk() or a clean shutdown of the class.
- libtextclassifier3::Status Put(std::string_view key, T value);
+ virtual libtextclassifier3::Status Put(std::string_view key, T value) = 0;
// Finds the current value for key and returns it. If key is not present, it
// is inserted with next_value and next_value is returned.
//
// Returns any IO errors that may occur during Put.
- libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key, T next_value);
+ virtual libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
+ T next_value) = 0;
// Returns the value corresponding to the key.
//
// Returns NOT_FOUND error if the key was missing.
// Returns any encountered IO errors.
- libtextclassifier3::StatusOr<T> Get(std::string_view key) const;
+ virtual libtextclassifier3::StatusOr<T> Get(std::string_view key) const = 0;
// Deletes data related to the given key. Returns true on success.
- bool Delete(std::string_view key);
+ virtual bool Delete(std::string_view key) = 0;
// Returns a map of values to keys. Empty map if the mapper is empty.
- std::unordered_map<T, std::string> GetValuesToKeys() const;
+ virtual std::unordered_map<T, std::string> GetValuesToKeys() const = 0;
// Count of unique keys stored in the KeyMapper.
- int32_t num_keys() const { return trie_.size(); }
+ virtual int32_t num_keys() const = 0;
// Syncs all the changes made to the KeyMapper to disk.
// Returns any encountered IO errors.
@@ -103,7 +78,7 @@ class KeyMapper {
// Returns:
// OK on success
// INTERNAL on I/O error
- libtextclassifier3::Status PersistToDisk();
+ virtual libtextclassifier3::Status PersistToDisk() = 0;
// Calculates and returns the disk usage in bytes. Rounds up to the nearest
// block size.
@@ -111,7 +86,7 @@ class KeyMapper {
// Returns:
// Disk usage on success
// INTERNAL_ERROR on IO error
- libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+ virtual libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const = 0;
// Returns the size of the elements held in the key mapper. This excludes the
// size of any internal metadata of the key mapper, e.g. the key mapper's
@@ -120,197 +95,16 @@ class KeyMapper {
// Returns:
// File size on success
// INTERNAL_ERROR on IO error
- libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ virtual libtextclassifier3::StatusOr<int64_t> GetElementsSize() const = 0;
// Computes and returns the checksum of the header and contents.
- Crc32 ComputeChecksum();
+ virtual Crc32 ComputeChecksum() = 0;
private:
- static constexpr char kKeyMapperDir[] = "key_mapper_dir";
- static constexpr char kKeyMapperPrefix[] = "key_mapper";
-
- // Use KeyMapper::Create() to instantiate.
- explicit KeyMapper(std::string_view key_mapper_dir);
-
- // Load any existing KeyMapper data from disk, or creates a new instance
- // of KeyMapper on disk and gets ready to process read/write operations.
- //
- // Returns any encountered IO errors.
- libtextclassifier3::Status Initialize(int maximum_size_bytes);
-
- const std::string file_prefix_;
-
- // TODO(adorokhine) Filesystem is a forked class that's available both in
- // icing and icing namespaces. We will need icing::Filesystem in order
- // to use IcingDynamicTrie. Filesystem class should be fully refactored
- // to have a single definition across both namespaces. Such a class should
- // use icing (and general google3) coding conventions and behave like
- // a proper C++ class.
- const IcingFilesystem icing_filesystem_;
- IcingDynamicTrie trie_;
-
static_assert(std::is_trivially_copyable<T>::value,
"T must be trivially copyable");
};
-template <typename T>
-libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>>
-KeyMapper<T>::Create(const Filesystem& filesystem, std::string_view base_dir,
- int maximum_size_bytes) {
- // We create a subdirectory since the trie creates and stores multiple files.
- // This makes it easier to isolate the trie files away from other files that
- // could potentially be in the same base_dir, and makes it easier to delete.
- const std::string key_mapper_dir =
- absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
- if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to create KeyMapper directory: ", key_mapper_dir));
- }
- auto mapper = std::unique_ptr<KeyMapper<T>>(new KeyMapper<T>(key_mapper_dir));
- ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
- return mapper;
-}
-
-template <typename T>
-libtextclassifier3::Status KeyMapper<T>::Delete(const Filesystem& filesystem,
- std::string_view base_dir) {
- std::string key_mapper_dir = absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
- if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to delete KeyMapper directory: ", key_mapper_dir));
- }
- return libtextclassifier3::Status::OK;
-}
-
-template <typename T>
-KeyMapper<T>::KeyMapper(std::string_view key_mapper_dir)
- : file_prefix_(absl_ports::StrCat(key_mapper_dir, "/", kKeyMapperPrefix)),
- trie_(file_prefix_,
- IcingDynamicTrie::RuntimeOptions().set_storage_policy(
- IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
- &icing_filesystem_) {}
-
-template <typename T>
-libtextclassifier3::Status KeyMapper<T>::Initialize(int maximum_size_bytes) {
- IcingDynamicTrie::Options options;
- // Divide the max space between the three internal arrays: nodes, nexts and
- // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
- // MaxSuffixesSize is in units of bytes.
- options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
- options.max_nexts = options.max_nodes;
- options.max_suffixes_size =
- sizeof(IcingDynamicTrie::Node) * options.max_nodes;
- options.value_size = sizeof(T);
-
- if (!trie_.CreateIfNotExist(options)) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Failed to create KeyMapper file: ", file_prefix_));
- }
- if (!trie_.Init()) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Failed to init KeyMapper file: ", file_prefix_));
- }
- return libtextclassifier3::Status::OK;
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<T> KeyMapper<T>::GetOrPut(std::string_view key,
- T next_value) {
- std::string string_key(key);
- uint32_t value_index;
- if (!trie_.Insert(string_key.c_str(), &next_value, &value_index,
- /*replace=*/false)) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
- }
- // This memory address could be unaligned since we're just grabbing the value
- // from somewhere in the trie's suffix array. The suffix array is filled with
- // chars, so the address might not be aligned to T values.
- const T* unaligned_value =
- static_cast<const T*>(trie_.GetValueAtIndex(value_index));
-
- // memcpy the value to ensure that the returned value here is in a T-aligned
- // address
- T aligned_value;
- memcpy(&aligned_value, unaligned_value, sizeof(T));
- return aligned_value;
-}
-
-template <typename T>
-libtextclassifier3::Status KeyMapper<T>::Put(std::string_view key, T value) {
- std::string string_key(key);
- if (!trie_.Insert(string_key.c_str(), &value)) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
- }
- return libtextclassifier3::Status::OK;
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<T> KeyMapper<T>::Get(std::string_view key) const {
- std::string string_key(key);
- T value;
- if (!trie_.Find(string_key.c_str(), &value)) {
- return absl_ports::NotFoundError(absl_ports::StrCat(
- "Key not found ", key, " in KeyMapper ", file_prefix_, "."));
- }
- return value;
-}
-
-template <typename T>
-bool KeyMapper<T>::Delete(std::string_view key) {
- return trie_.Delete(key);
-}
-
-template <typename T>
-std::unordered_map<T, std::string> KeyMapper<T>::GetValuesToKeys() const {
- std::unordered_map<T, std::string> values_to_keys;
- for (IcingDynamicTrie::Iterator itr(trie_, /*prefix=*/""); itr.IsValid();
- itr.Advance()) {
- if (itr.IsValid()) {
- T value;
- memcpy(&value, itr.GetValue(), sizeof(T));
- values_to_keys.insert({value, itr.GetKey()});
- }
- }
-
- return values_to_keys;
-}
-
-template <typename T>
-libtextclassifier3::Status KeyMapper<T>::PersistToDisk() {
- if (!trie_.Sync()) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Failed to sync KeyMapper file: ", file_prefix_));
- }
-
- return libtextclassifier3::Status::OK;
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetDiskUsage() const {
- int64_t size = trie_.GetDiskUsage();
- if (size == IcingFilesystem::kBadFileSize || size < 0) {
- return absl_ports::InternalError("Failed to get disk usage of key mapper");
- }
- return size;
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetElementsSize() const {
- int64_t size = trie_.GetElementsSize();
- if (size == IcingFilesystem::kBadFileSize || size < 0) {
- return absl_ports::InternalError(
- "Failed to get disk usage of elements in the key mapper");
- }
- return size;
-}
-
-template <typename T>
-Crc32 KeyMapper<T>::ComputeChecksum() {
- return Crc32(trie_.UpdateCrc());
-}
-
} // namespace lib
} // namespace icing
diff --git a/icing/store/namespace-checker-impl.h b/icing/store/namespace-checker-impl.h
index bcd0643..0b6fca9 100644
--- a/icing/store/namespace-checker-impl.h
+++ b/icing/store/namespace-checker-impl.h
@@ -32,14 +32,18 @@ class NamespaceCheckerImpl : public NamespaceChecker {
target_namespace_ids_(std::move(target_namespace_ids)) {}
bool BelongsToTargetNamespaces(DocumentId document_id) const override {
+ auto document_filter_data_optional_ =
+ document_store_.GetAliveDocumentFilterData(document_id);
+ if (!document_filter_data_optional_) {
+ // The document doesn't exist.
+ return false;
+ }
if (target_namespace_ids_.empty()) {
return true;
}
- auto document_filter_data_or_ =
- document_store_.GetDocumentFilterData(document_id);
- return document_filter_data_or_.ok() &&
- target_namespace_ids_.count(
- document_filter_data_or_.ValueOrDie().namespace_id())> 0;
+ DocumentFilterData document_filter_data =
+ document_filter_data_optional_.value();
+ return target_namespace_ids_.count(document_filter_data.namespace_id()) > 0;
}
const DocumentStore& document_store_;
std::unordered_set<NamespaceId> target_namespace_ids_;
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index f83fe0a..81f65b2 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -460,6 +460,10 @@ MATCHER_P(EqualsSearchResultIgnoreStatsAndScores, expected, "") {
ICING_ASSERT_OK(statusor.status()); \
lhs = std::move(statusor).ValueOrDie()
+#define ICING_ASSERT_HAS_VALUE_AND_ASSIGN(lhs, rexpr) \
+ ASSERT_TRUE(rexpr); \
+ lhs = rexpr.value()
+
} // namespace lib
} // namespace icing
diff --git a/icing/tokenization/combined-tokenizer_test.cc b/icing/tokenization/combined-tokenizer_test.cc
index 0212e4f..42c7743 100644
--- a/icing/tokenization/combined-tokenizer_test.cc
+++ b/icing/tokenization/combined-tokenizer_test.cc
@@ -15,19 +15,19 @@
#include <string_view>
#include <vector>
-#include "testing/base/public/gmock.h"
-#include "testing/base/public/gunit.h"
-#include "third_party/icing/portable/platform.h"
-#include "third_party/icing/proto/schema_proto_portable.pb.h"
-#include "third_party/icing/testing/common-matchers.h"
-#include "third_party/icing/testing/icu-data-file-helper.h"
-#include "third_party/icing/testing/jni-test-helpers.h"
-#include "third_party/icing/testing/test-data.h"
-#include "third_party/icing/tokenization/language-segmenter-factory.h"
-#include "third_party/icing/tokenization/language-segmenter.h"
-#include "third_party/icing/tokenization/tokenizer-factory.h"
-#include "third_party/icing/tokenization/tokenizer.h"
-#include "third_party/icu/include/unicode/uloc.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
+#include "unicode/uloc.h"
namespace icing {
namespace lib {
@@ -43,9 +43,9 @@ class CombinedTokenizerTest : public ::testing::Test {
void SetUp() override {
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //third_party/icing/BUILD.
+ // File generated via icu_data_file rule in //icing/BUILD.
icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("third_party/icing/icu.dat")));
+ GetTestFilePath("icing/icu.dat")));
}
jni_cache_ = GetTestJniCache();
diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc
index 6f7d4df..748a322 100644
--- a/icing/tokenization/language-segmenter_benchmark.cc
+++ b/icing/tokenization/language-segmenter_benchmark.cc
@@ -27,7 +27,7 @@
// //icing/tokenization:language-segmenter_benchmark
//
// $ blaze-bin/icing/tokenization/language-segmenter_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
//
// Run on an Android device:
// Make target //icing/tokenization:language-segmenter depend on
@@ -41,7 +41,7 @@
// blaze-bin/icing/tokenization/language-segmenter_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmarks=all
+// $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmark_filter=all
// --adb
// Flag to tell the benchmark that it'll be run on an Android device via adb,
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
index e5de6e6..bd80718 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
@@ -74,6 +74,7 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
MarkAsDone();
return false;
}
+
return true;
}
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
index 277ece6..8b13cd1 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
@@ -423,7 +423,6 @@ TEST_P(ReverseJniLanguageSegmenterTest, CJKT) {
// Khmer
EXPECT_THAT(language_segmenter->GetAllTerms("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ", "។")));
-
// Thai
EXPECT_THAT(
language_segmenter->GetAllTerms("ฉันเดินไปทำงานทุกวัน"),
diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc
index fdd4c70..fe8289a 100644
--- a/icing/transform/icu/icu-normalizer_benchmark.cc
+++ b/icing/transform/icu/icu-normalizer_benchmark.cc
@@ -25,7 +25,7 @@
// //icing/transform/icu:icu-normalizer_benchmark
//
// $ blaze-bin/icing/transform/icu/icu-normalizer_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
//
// Run on an Android device:
// Make target //icing/transform:normalizer depend on
@@ -39,7 +39,7 @@
// blaze-bin/icing/transform/icu/icu-normalizer_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/icu-normalizer_benchmark --benchmarks=all
+// $ adb shell /data/local/tmp/icu-normalizer_benchmark --benchmark_filter=all
// --adb
// Flag to tell the benchmark that it'll be run on an Android device via adb,
diff --git a/icing/transform/map/map-normalizer_benchmark.cc b/icing/transform/map/map-normalizer_benchmark.cc
index 8268541..4560329 100644
--- a/icing/transform/map/map-normalizer_benchmark.cc
+++ b/icing/transform/map/map-normalizer_benchmark.cc
@@ -24,7 +24,7 @@
// //icing/transform/map:map-normalizer_benchmark
//
// $ blaze-bin/icing/transform/map/map-normalizer_benchmark
-// --benchmarks=all
+// --benchmark_filter=all
//
// Run on an Android device:
// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
@@ -35,7 +35,7 @@
// blaze-bin/icing/transform/map/map-normalizer_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/map-normalizer_benchmark --benchmarks=all
+// $ adb shell /data/local/tmp/map-normalizer_benchmark --benchmark_filter=all
namespace icing {
namespace lib {
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index 45c23e0..b03d3f5 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -125,10 +125,10 @@ class DocumentValidatorTest : public ::testing::Test {
}
std::string schema_dir_;
- std::unique_ptr<DocumentValidator> document_validator_;
- std::unique_ptr<SchemaStore> schema_store_;
Filesystem filesystem_;
FakeClock fake_clock_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentValidator> document_validator_;
};
TEST_F(DocumentValidatorTest, ValidateSimpleSchemasOk) {
diff --git a/icing/util/fingerprint-util.cc b/icing/util/fingerprint-util.cc
new file mode 100644
index 0000000..0ea843f
--- /dev/null
+++ b/icing/util/fingerprint-util.cc
@@ -0,0 +1,48 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/fingerprint-util.h"
+
+namespace icing {
+namespace lib {
+
+namespace fingerprint_util {
+
+// A formatter to properly handle a string that is actually just a hash value.
+std::string GetFingerprintString(uint64_t fingerprint) {
+ std::string encoded_fprint;
+ // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in
+ // base128 and add 1 to make sure that no byte is '0'. This increases the
+ // size of the encoded_fprint from 8-bytes to 10-bytes.
+ while (fingerprint) {
+ encoded_fprint.push_back((fingerprint & 0x7F) + 1);
+ fingerprint >>= 7;
+ }
+ return encoded_fprint;
+}
+
+uint64_t GetFingerprint(std::string_view fingerprint_string) {
+ uint64_t fprint = 0;
+ for (int i = fingerprint_string.length() - 1; i >= 0; --i) {
+ fprint <<= 7;
+ char c = fingerprint_string[i] - 1;
+ fprint |= (c & 0x7F);
+ }
+ return fprint;
+}
+
+} // namespace fingerprint_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/util/fingerprint-util.h b/icing/util/fingerprint-util.h
new file mode 100644
index 0000000..9e98617
--- /dev/null
+++ b/icing/util/fingerprint-util.h
@@ -0,0 +1,47 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_FINGERPRINT_UTIL_H_
+#define ICING_UTIL_FINGERPRINT_UTIL_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+namespace fingerprint_util {
+
+// Converts from a fingerprint to a fingerprint string.
+std::string GetFingerprintString(uint64_t fingerprint);
+
+// Converts from a fingerprint string to a fingerprint.
+uint64_t GetFingerprint(std::string_view fingerprint_string);
+
+// A formatter to properly handle a string that is actually just a hash value.
+class FingerprintStringFormatter {
+ public:
+ std::string operator()(std::string_view fingerprint_string) {
+ uint64_t fingerprint = GetFingerprint(fingerprint_string);
+ return std::to_string(fingerprint);
+ }
+};
+
+} // namespace fingerprint_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_UTIL_FINGERPRINT_UTIL_H_
diff --git a/icing/util/fingerprint-util_test.cc b/icing/util/fingerprint-util_test.cc
new file mode 100644
index 0000000..948c75a
--- /dev/null
+++ b/icing/util/fingerprint-util_test.cc
@@ -0,0 +1,75 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/fingerprint-util.h"
+
+#include <cstdint>
+#include <limits>
+
+#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace fingerprint_util {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(FingerprintUtilTest, ConversionIsReversible) {
+ std::string str = "foo-bar-baz";
+ uint64_t fprint = tc3farmhash::Fingerprint64(str);
+ std::string fprint_string = GetFingerprintString(fprint);
+ EXPECT_THAT(GetFingerprint(fprint_string), Eq(fprint));
+}
+
+TEST(FingerprintUtilTest, ZeroConversionIsReversible) {
+ uint64_t fprint = 0;
+ std::string fprint_string = GetFingerprintString(fprint);
+ EXPECT_THAT(GetFingerprint(fprint_string), Eq(fprint));
+}
+
+TEST(FingerprintUtilTest, MultipleConversionsAreReversible) {
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(25)), Eq(25));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(766)), Eq(766));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(2305)), Eq(2305));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(6922)), Eq(6922));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(62326)), Eq(62326));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(186985)), Eq(186985));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(560962)), Eq(560962));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(1682893)), Eq(1682893));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(15146065)), Eq(15146065));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(136314613)), Eq(136314613));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(1226831545)), Eq(1226831545));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(11041483933)),
+ Eq(11041483933));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(2683080596566)),
+ Eq(2683080596566));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(72443176107373)),
+ Eq(72443176107373));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(1955965754899162)),
+ Eq(1955965754899162));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(52811075382277465)),
+ Eq(52811075382277465));
+ EXPECT_THAT(GetFingerprint(GetFingerprintString(4277697105964474945)),
+ Eq(4277697105964474945));
+}
+
+} // namespace
+
+} // namespace fingerprint_util
+} // namespace lib
+} // namespace icing
diff --git a/icing/util/logging.cc b/icing/util/logging.cc
new file mode 100644
index 0000000..8498be4
--- /dev/null
+++ b/icing/util/logging.cc
@@ -0,0 +1,124 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/logging.h"
+
+#include <atomic>
+#include <exception>
+#include <string_view>
+
+#include "icing/util/logging_raw.h"
+
+namespace icing {
+namespace lib {
+namespace {
+// Returns pointer to beginning of last /-separated token from file_name.
+// file_name should be a pointer to a zero-terminated array of chars.
+// E.g., "foo/bar.cc" -> "bar.cc", "foo/" -> "", "foo" -> "foo".
+const char *JumpToBasename(const char *file_name) {
+ if (file_name == nullptr) {
+ return nullptr;
+ }
+
+ // Points to the beginning of the last encountered token.
+ size_t last_token_start = std::string_view(file_name).find_last_of('/');
+ if (last_token_start == std::string_view::npos) {
+ return file_name;
+ }
+ return file_name + last_token_start + 1;
+}
+
+// Calculate the logging level value based on severity and verbosity.
+constexpr uint32_t CalculateLoggingLevel(LogSeverity::Code severity,
+ uint16_t verbosity) {
+ uint32_t logging_level = static_cast<uint16_t>(severity);
+ logging_level = (logging_level << 16) | verbosity;
+ return logging_level;
+}
+
+#if defined(ICING_DEBUG_LOGGING)
+#define DEFAULT_LOGGING_LEVEL CalculateLoggingLevel(LogSeverity::VERBOSE, 1)
+#else
+#define DEFAULT_LOGGING_LEVEL CalculateLoggingLevel(LogSeverity::INFO, 0)
+#endif
+
+// The current global logging level for Icing, which controls which logs are
+// printed based on severity and verbosity.
+//
+// This needs to be global so that it can be easily accessed from ICING_LOG and
+// ICING_VLOG macros spread throughout the entire code base.
+//
+// The first 16 bits represent the minimal log severity.
+// The last 16 bits represent the current verbosity.
+std::atomic<uint32_t> global_logging_level = DEFAULT_LOGGING_LEVEL;
+
+} // namespace
+
+// Whether we should log according to the current logging level.
+bool ShouldLog(LogSeverity::Code severity, int16_t verbosity) {
+ if (verbosity < 0) {
+ return false;
+ }
+ // Using the relaxed order for better performance because we only need to
+ // guarantee the atomicity for this specific statement, without the need to
+ // worry about reordering.
+ uint32_t curr_logging_level =
+ global_logging_level.load(std::memory_order_relaxed);
+ // If severity is less than the the threshold set.
+ if (static_cast<uint16_t>(severity) < (curr_logging_level >> 16)) {
+ return false;
+ }
+ if (severity == LogSeverity::VERBOSE) {
+ // return whether the verbosity is within the current verbose level set.
+ return verbosity <= (curr_logging_level & 0xffff);
+ }
+ return true;
+}
+
+bool SetLoggingLevel(LogSeverity::Code severity, int16_t verbosity) {
+ if (verbosity < 0) {
+ return false;
+ }
+ if (severity > LogSeverity::VERBOSE && verbosity > 0) {
+ return false;
+ }
+ // Using the relaxed order for better performance because we only need to
+ // guarantee the atomicity for this specific statement, without the need to
+ // worry about reordering.
+ global_logging_level.store(CalculateLoggingLevel(severity, verbosity),
+ std::memory_order_relaxed);
+ return true;
+}
+
+LogMessage::LogMessage(LogSeverity::Code severity, uint16_t verbosity,
+ const char *file_name, int line_number)
+ : severity_(severity),
+ verbosity_(verbosity),
+ should_log_(ShouldLog(severity_, verbosity_)),
+ stream_(should_log_) {
+ if (should_log_) {
+ stream_ << JumpToBasename(file_name) << ":" << line_number << ": ";
+ }
+}
+
+LogMessage::~LogMessage() {
+ if (should_log_) {
+ LowLevelLogging(severity_, kIcingLoggingTag, stream_.message);
+ }
+ if (severity_ == LogSeverity::FATAL) {
+ std::terminate(); // Will print a stacktrace (stdout or logcat).
+ }
+}
+} // namespace lib
+} // namespace icing
diff --git a/icing/util/logging.h b/icing/util/logging.h
index 9d598fe..7742302 100644
--- a/icing/util/logging.h
+++ b/icing/util/logging.h
@@ -15,14 +15,130 @@
#ifndef ICING_UTIL_LOGGING_H_
#define ICING_UTIL_LOGGING_H_
-#include "icing/text_classifier/lib3/utils/base/logging.h"
+#include <atomic>
+#include <cstdint>
+#include <string>
+#include "icing/proto/debug.pb.h"
+
+// This header provides base/logging.h style macros, ICING_LOG and ICING_VLOG,
+// for logging in various platforms. The macros use __android_log_write on
+// Android, and log to stdout/stderr on others. It also provides a function
+// SetLoggingLevel to control the log severity level for ICING_LOG and verbosity
+// for ICING_VLOG.
namespace icing {
namespace lib {
-// TODO(b/146903474) Add verbose level control
-#define ICING_VLOG(verbose_level) TC3_VLOG(verbose_level)
-#define ICING_LOG(severity) TC3_LOG(severity)
+// Whether we should log according to the current logging level.
+// The function will always return false when verbosity is negative.
+bool ShouldLog(LogSeverity::Code severity, int16_t verbosity = 0);
+
+// Set the minimal logging severity to be enabled, and the verbose level to see
+// from the logs.
+// Return false if severity is set higher than VERBOSE but verbosity is not 0.
+// The function will always return false when verbosity is negative.
+bool SetLoggingLevel(LogSeverity::Code severity, int16_t verbosity = 0);
+
+// A tiny code footprint string stream for assembling log messages.
+struct LoggingStringStream {
+ explicit LoggingStringStream(bool should_log) : should_log_(should_log) {}
+ LoggingStringStream& stream() { return *this; }
+
+ std::string message;
+ const bool should_log_;
+};
+
+template <typename T>
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+ const T& entry) {
+ if (stream.should_log_) {
+ stream.message.append(std::to_string(entry));
+ }
+ return stream;
+}
+
+template <typename T>
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+ T* const entry) {
+ if (stream.should_log_) {
+ stream.message.append(
+ std::to_string(reinterpret_cast<const uint64_t>(entry)));
+ }
+ return stream;
+}
+
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+ const char* message) {
+ if (stream.should_log_) {
+ stream.message.append(message);
+ }
+ return stream;
+}
+
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+ const std::string& message) {
+ if (stream.should_log_) {
+ stream.message.append(message);
+ }
+ return stream;
+}
+
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+ std::string_view message) {
+ if (stream.should_log_) {
+ stream.message.append(message);
+ }
+ return stream;
+}
+
+template <typename T1, typename T2>
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+ const std::pair<T1, T2>& entry) {
+ if (stream.should_log_) {
+ stream << "(" << entry.first << ", " << entry.second << ")";
+ }
+ return stream;
+}
+
+// The class that does all the work behind our ICING_LOG(severity) macros. Each
+// ICING_LOG(severity) << obj1 << obj2 << ...; logging statement creates a
+// LogMessage temporary object containing a stringstream. Each operator<< adds
+// info to that stringstream and the LogMessage destructor performs the actual
+// logging. The reason this works is that in C++, "all temporary objects are
+// destroyed as the last step in evaluating the full-expression that (lexically)
+// contains the point where they were created." For more info, see
+// http://en.cppreference.com/w/cpp/language/lifetime. Hence, the destructor is
+// invoked after the last << from that logging statement.
+class LogMessage {
+ public:
+ LogMessage(LogSeverity::Code severity, uint16_t verbosity,
+ const char* file_name, int line_number) __attribute__((noinline));
+
+ ~LogMessage() __attribute__((noinline));
+
+ // Returns the stream associated with the logger object.
+ LoggingStringStream& stream() { return stream_; }
+
+ private:
+ const LogSeverity::Code severity_;
+ const uint16_t verbosity_;
+ const bool should_log_;
+
+ // Stream that "prints" all info into a string (not to a file). We construct
+ // here the entire logging message and next print it in one operation.
+ LoggingStringStream stream_;
+};
+
+inline constexpr char kIcingLoggingTag[] = "AppSearchIcing";
+
+#define ICING_VLOG(verbose_level) \
+ ::icing::lib::LogMessage(::icing::lib::LogSeverity::VERBOSE, verbose_level, \
+ __FILE__, __LINE__) \
+ .stream()
+#define ICING_LOG(severity) \
+ ::icing::lib::LogMessage(::icing::lib::LogSeverity::severity, \
+ /*verbosity=*/0, __FILE__, __LINE__) \
+ .stream()
} // namespace lib
} // namespace icing
diff --git a/icing/util/logging_raw.cc b/icing/util/logging_raw.cc
new file mode 100644
index 0000000..5e67fb3
--- /dev/null
+++ b/icing/util/logging_raw.cc
@@ -0,0 +1,102 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/logging_raw.h"
+
+#include <cstdio>
+#include <string>
+
+// NOTE: this file contains two implementations: one for Android, one for all
+// other cases. We always build exactly one implementation.
+#if defined(__ANDROID__)
+
+// Compiled as part of Android.
+#include <android/log.h>
+
+namespace icing {
+namespace lib {
+
+namespace {
+// Converts LogSeverity to level for __android_log_write.
+int GetAndroidLogLevel(LogSeverity::Code severity) {
+ switch (severity) {
+ case LogSeverity::VERBOSE:
+ return ANDROID_LOG_VERBOSE;
+ case LogSeverity::DBG:
+ return ANDROID_LOG_DEBUG;
+ case LogSeverity::INFO:
+ return ANDROID_LOG_INFO;
+ case LogSeverity::WARNING:
+ return ANDROID_LOG_WARN;
+ case LogSeverity::ERROR:
+ return ANDROID_LOG_ERROR;
+ case LogSeverity::FATAL:
+ return ANDROID_LOG_FATAL;
+ }
+}
+} // namespace
+
+void LowLevelLogging(LogSeverity::Code severity, const std::string& tag,
+ const std::string& message) {
+ const int android_log_level = GetAndroidLogLevel(severity);
+#if __ANDROID_API__ >= 30
+ if (!__android_log_is_loggable(android_log_level, tag.c_str(),
+ /*default_prio=*/ANDROID_LOG_INFO)) {
+ return;
+ }
+#endif // __ANDROID_API__ >= 30
+ __android_log_write(android_log_level, tag.c_str(), message.c_str());
+}
+
+} // namespace lib
+} // namespace icing
+
+#else // if defined(__ANDROID__)
+
+// Not on Android: implement LowLevelLogging to print to stderr (see below).
+namespace icing {
+namespace lib {
+
+namespace {
+// Converts LogSeverity to human-readable text.
+const char *LogSeverityToString(LogSeverity::Code severity) {
+ switch (severity) {
+ case LogSeverity::VERBOSE:
+ return "VERBOSE";
+ case LogSeverity::DBG:
+ return "DEBUG";
+ case LogSeverity::INFO:
+ return "INFO";
+ case LogSeverity::WARNING:
+ return "WARNING";
+ case LogSeverity::ERROR:
+ return "ERROR";
+ case LogSeverity::FATAL:
+ return "FATAL";
+ }
+}
+} // namespace
+
+void LowLevelLogging(LogSeverity::Code severity, const std::string &tag,
+ const std::string &message) {
+ // TODO(b/146903474) Do not log to stderr for logs other than FATAL and ERROR.
+ fprintf(stderr, "[%s] %s : %s\n", LogSeverityToString(severity), tag.c_str(),
+ message.c_str());
+ fflush(stderr);
+}
+
+} // namespace lib
+} // namespace icing
+
+#endif // if defined(__ANDROID__)
diff --git a/icing/util/logging_raw.h b/icing/util/logging_raw.h
new file mode 100644
index 0000000..99dddb6
--- /dev/null
+++ b/icing/util/logging_raw.h
@@ -0,0 +1,34 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_LOGGING_RAW_H_
+#define ICING_UTIL_LOGGING_RAW_H_
+
+#include <string>
+
+#include "icing/proto/debug.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Low-level logging primitive. Logs a message, with the indicated log
+// severity. From android/log.h: "the tag normally corresponds to the component
+// that emits the log message, and should be reasonably small".
+void LowLevelLogging(LogSeverity::Code severity, const std::string &tag,
+ const std::string &message);
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_UTIL_LOGGING_RAW_H_
diff --git a/icing/util/logging_test.cc b/icing/util/logging_test.cc
new file mode 100644
index 0000000..eac018e
--- /dev/null
+++ b/icing/util/logging_test.cc
@@ -0,0 +1,158 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/logging.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/util/logging_raw.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::EndsWith;
+using ::testing::IsEmpty;
+
+TEST(LoggingTest, SetLoggingLevelWithInvalidArguments) {
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::DBG, 1));
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::INFO, 1));
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::WARNING, 1));
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::ERROR, 1));
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::FATAL, 1));
+
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::DBG, 2));
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::INFO, 2));
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::WARNING, 2));
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::ERROR, 2));
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::FATAL, 2));
+
+ EXPECT_FALSE(SetLoggingLevel(LogSeverity::VERBOSE, -1));
+}
+
+TEST(LoggingTest, SetLoggingLevelTest) {
+ // Set to INFO
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO));
+ EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+ EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+ EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+
+ // Set to WARNING
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::WARNING));
+ EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+ EXPECT_FALSE(ShouldLog(LogSeverity::INFO));
+ EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+
+ // Set to DEBUG
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::DBG));
+ EXPECT_TRUE(ShouldLog(LogSeverity::DBG));
+ EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+ EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+}
+
+TEST(LoggingTest, VerboseLoggingTest) {
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 1));
+ EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1));
+ EXPECT_TRUE(ShouldLog(LogSeverity::DBG));
+ EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+ EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+ EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+ EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, VerboseLoggingIsControlledByVerbosity) {
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 2));
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 3));
+ EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 2));
+ EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1));
+
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 1));
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 2));
+ EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1));
+
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 0));
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+ EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 0));
+
+ // Negative verbosity is invalid.
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, -1));
+}
+
+TEST(LoggingTest, DebugLoggingTest) {
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::DBG));
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+ EXPECT_TRUE(ShouldLog(LogSeverity::DBG));
+ EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+ EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+ EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+ EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, InfoLoggingTest) {
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO));
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+ EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+ EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+ EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+ EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+ EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, WarningLoggingTest) {
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::WARNING));
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+ EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+ EXPECT_FALSE(ShouldLog(LogSeverity::INFO));
+ EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+ EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+ EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, ErrorLoggingTest) {
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::ERROR));
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+ EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+ EXPECT_FALSE(ShouldLog(LogSeverity::INFO));
+ EXPECT_FALSE(ShouldLog(LogSeverity::WARNING));
+ EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+ EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, FatalLoggingTest) {
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::FATAL));
+ EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+ EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+ EXPECT_FALSE(ShouldLog(LogSeverity::INFO));
+ EXPECT_FALSE(ShouldLog(LogSeverity::WARNING));
+ EXPECT_FALSE(ShouldLog(LogSeverity::ERROR));
+ EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, LoggingStreamTest) {
+ ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO));
+ // This one should be logged.
+ LoggingStringStream stream1 = (ICING_LOG(INFO) << "Hello"
+ << "World!");
+ EXPECT_THAT(stream1.message, EndsWith("HelloWorld!"));
+
+ // This one should not be logged, thus empty.
+ LoggingStringStream stream2 = (ICING_LOG(DBG) << "Hello"
+ << "World!");
+ EXPECT_THAT(stream2.message, IsEmpty());
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 95e0c84..16a4a4a 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -16,6 +16,9 @@ package com.google.android.icing;
import android.util.Log;
import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
+import com.google.android.icing.proto.DebugInfoResultProto;
+import com.google.android.icing.proto.DebugInfoVerbosity;
import com.google.android.icing.proto.DeleteByNamespaceResultProto;
import com.google.android.icing.proto.DeleteByQueryResultProto;
import com.google.android.icing.proto.DeleteBySchemaTypeResultProto;
@@ -29,6 +32,7 @@ import com.google.android.icing.proto.GetSchemaResultProto;
import com.google.android.icing.proto.GetSchemaTypeResultProto;
import com.google.android.icing.proto.IcingSearchEngineOptions;
import com.google.android.icing.proto.InitializeResultProto;
+import com.google.android.icing.proto.LogSeverity;
import com.google.android.icing.proto.OptimizeResultProto;
import com.google.android.icing.proto.PersistToDiskResultProto;
import com.google.android.icing.proto.PersistType;
@@ -74,7 +78,9 @@ public class IcingSearchEngine implements Closeable {
System.loadLibrary("icing");
}
- /** @throws IllegalStateException if IcingSearchEngine fails to be created */
+ /**
+ * @throws IllegalStateException if IcingSearchEngine fails to be created
+ */
public IcingSearchEngine(@NonNull IcingSearchEngineOptions options) {
nativePointer = nativeCreate(options.toByteArray());
if (nativePointer == 0) {
@@ -439,9 +445,16 @@ public class IcingSearchEngine implements Closeable {
@NonNull
public DeleteByQueryResultProto deleteByQuery(@NonNull SearchSpecProto searchSpec) {
+ return deleteByQuery(searchSpec, /*returnDeletedDocumentInfo=*/ false);
+ }
+
+ @NonNull
+ public DeleteByQueryResultProto deleteByQuery(
+ @NonNull SearchSpecProto searchSpec, boolean returnDeletedDocumentInfo) {
throwIfClosed();
- byte[] deleteResultBytes = nativeDeleteByQuery(this, searchSpec.toByteArray());
+ byte[] deleteResultBytes =
+ nativeDeleteByQuery(this, searchSpec.toByteArray(), returnDeletedDocumentInfo);
if (deleteResultBytes == null) {
Log.e(TAG, "Received null DeleteResultProto from native.");
return DeleteByQueryResultProto.newBuilder()
@@ -539,8 +552,7 @@ public class IcingSearchEngine implements Closeable {
}
try {
- return StorageInfoResultProto.parseFrom(
- storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE);
+ return StorageInfoResultProto.parseFrom(storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE);
} catch (InvalidProtocolBufferException e) {
Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e);
return StorageInfoResultProto.newBuilder()
@@ -550,6 +562,28 @@ public class IcingSearchEngine implements Closeable {
}
@NonNull
+ public DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity) {
+ throwIfClosed();
+
+ byte[] debugInfoResultProtoBytes = nativeGetDebugInfo(this, verbosity.getNumber());
+ if (debugInfoResultProtoBytes == null) {
+ Log.e(TAG, "Received null DebugInfoResultProto from native.");
+ return DebugInfoResultProto.newBuilder()
+ .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+ .build();
+ }
+
+ try {
+ return DebugInfoResultProto.parseFrom(debugInfoResultProtoBytes, EXTENSION_REGISTRY_LITE);
+ } catch (InvalidProtocolBufferException e) {
+ Log.e(TAG, "Error parsing DebugInfoResultProto.", e);
+ return DebugInfoResultProto.newBuilder()
+ .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+ .build();
+ }
+ }
+
+ @NonNull
public ResetResultProto reset() {
throwIfClosed();
@@ -571,6 +605,31 @@ public class IcingSearchEngine implements Closeable {
}
}
+ public static boolean shouldLog(LogSeverity.Code severity) {
+ return shouldLog(severity, (short) 0);
+ }
+
+ public static boolean shouldLog(LogSeverity.Code severity, short verbosity) {
+ return nativeShouldLog((short) severity.getNumber(), verbosity);
+ }
+
+ public static boolean setLoggingLevel(LogSeverity.Code severity) {
+ return setLoggingLevel(severity, (short) 0);
+ }
+
+ public static boolean setLoggingLevel(LogSeverity.Code severity, short verbosity) {
+ return nativeSetLoggingLevel((short) severity.getNumber(), verbosity);
+ }
+
+ @Nullable
+ public static String getLoggingTag() {
+ String tag = nativeGetLoggingTag();
+ if (tag == null) {
+ Log.e(TAG, "Received null logging tag from native.");
+ }
+ return tag;
+ }
+
private static native long nativeCreate(byte[] icingSearchEngineOptionsBytes);
private static native void nativeDestroy(IcingSearchEngine instance);
@@ -615,7 +674,7 @@ public class IcingSearchEngine implements Closeable {
IcingSearchEngine instance, String schemaType);
private static native byte[] nativeDeleteByQuery(
- IcingSearchEngine instance, byte[] searchSpecBytes);
+ IcingSearchEngine instance, byte[] searchSpecBytes, boolean returnDeletedDocumentInfo);
private static native byte[] nativePersistToDisk(IcingSearchEngine instance, int persistType);
@@ -629,4 +688,12 @@ public class IcingSearchEngine implements Closeable {
private static native byte[] nativeSearchSuggestions(
IcingSearchEngine instance, byte[] suggestionSpecBytes);
+
+ private static native byte[] nativeGetDebugInfo(IcingSearchEngine instance, int verbosity);
+
+ private static native boolean nativeShouldLog(short severity, short verbosity);
+
+ private static native boolean nativeSetLoggingLevel(short severity, short verbosity);
+
+ private static native String nativeGetLoggingTag();
}
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index a46814c..c690990 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -17,6 +17,8 @@ package com.google.android.icing;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth.assertWithMessage;
+import com.google.android.icing.proto.DebugInfoResultProto;
+import com.google.android.icing.proto.DebugInfoVerbosity;
import com.google.android.icing.proto.DeleteByNamespaceResultProto;
import com.google.android.icing.proto.DeleteByQueryResultProto;
import com.google.android.icing.proto.DeleteBySchemaTypeResultProto;
@@ -30,6 +32,7 @@ import com.google.android.icing.proto.GetSchemaResultProto;
import com.google.android.icing.proto.GetSchemaTypeResultProto;
import com.google.android.icing.proto.IcingSearchEngineOptions;
import com.google.android.icing.proto.InitializeResultProto;
+import com.google.android.icing.proto.LogSeverity;
import com.google.android.icing.proto.OptimizeResultProto;
import com.google.android.icing.proto.PersistToDiskResultProto;
import com.google.android.icing.proto.PersistType;
@@ -389,6 +392,60 @@ public final class IcingSearchEngineTest {
DeleteByQueryResultProto deleteResultProto = icingSearchEngine.deleteByQuery(searchSpec);
assertStatusOk(deleteResultProto.getStatus());
+ // By default, the deleteByQuery API does not return the summary about deleted documents, unless
+ // the returnDeletedDocumentInfo parameter is set to true.
+ assertThat(deleteResultProto.getDeletedDocumentsList()).isEmpty();
+
+ GetResultProto getResultProto =
+ icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance());
+ assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
+ getResultProto =
+ icingSearchEngine.get("namespace", "uri2", GetResultSpecProto.getDefaultInstance());
+ assertStatusOk(getResultProto.getStatus());
+ }
+
+ @Test
+ public void testDeleteByQueryWithDeletedDocumentInfo() throws Exception {
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+ SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+ SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+ assertThat(
+ icingSearchEngine
+ .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+ .getStatus()
+ .getCode())
+ .isEqualTo(StatusProto.Code.OK);
+
+ DocumentProto emailDocument1 =
+ createEmailDocument("namespace", "uri1").toBuilder()
+ .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
+ .build();
+
+ assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+ DocumentProto emailDocument2 =
+ createEmailDocument("namespace", "uri2").toBuilder()
+ .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("bar"))
+ .build();
+
+ assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus());
+
+ SearchSpecProto searchSpec =
+ SearchSpecProto.newBuilder()
+ .setQuery("foo")
+ .setTermMatchType(TermMatchType.Code.PREFIX)
+ .build();
+
+ DeleteByQueryResultProto deleteResultProto =
+ icingSearchEngine.deleteByQuery(searchSpec, /*returnDeletedDocumentInfo=*/ true);
+ assertStatusOk(deleteResultProto.getStatus());
+ DeleteByQueryResultProto.DocumentGroupInfo info =
+ DeleteByQueryResultProto.DocumentGroupInfo.newBuilder()
+ .setNamespace("namespace")
+ .setSchema("Email")
+ .addUris("uri1")
+ .build();
+ assertThat(deleteResultProto.getDeletedDocumentsList()).containsExactly(info);
GetResultProto getResultProto =
icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance());
@@ -434,6 +491,35 @@ public final class IcingSearchEngineTest {
}
@Test
+ public void testGetDebugInfo() throws Exception {
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+ SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+ SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+ assertThat(
+ icingSearchEngine
+ .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+ .getStatus()
+ .getCode())
+ .isEqualTo(StatusProto.Code.OK);
+
+ DocumentProto emailDocument = createEmailDocument("namespace", "uri");
+ assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
+
+ DebugInfoResultProto debugInfoResultProtoBasic =
+ icingSearchEngine.getDebugInfo(DebugInfoVerbosity.Code.BASIC);
+ assertStatusOk(debugInfoResultProtoBasic.getStatus());
+ assertThat(debugInfoResultProtoBasic.getDebugInfo().getDocumentInfo().getCorpusInfoList())
+ .isEmpty(); // because verbosity=BASIC
+
+ DebugInfoResultProto debugInfoResultProtoDetailed =
+ icingSearchEngine.getDebugInfo(DebugInfoVerbosity.Code.DETAILED);
+ assertStatusOk(debugInfoResultProtoDetailed.getStatus());
+ assertThat(debugInfoResultProtoDetailed.getDebugInfo().getDocumentInfo().getCorpusInfoList())
+ .hasSize(1); // because verbosity=DETAILED
+ }
+
+ @Test
public void testGetAllNamespaces() throws Exception {
assertStatusOk(icingSearchEngine.initialize().getStatus());
@@ -668,6 +754,31 @@ public final class IcingSearchEngineTest {
assertThat(response.getSuggestions(1).getQuery()).isEqualTo("fo");
}
+ @Test
+ public void testLogging() throws Exception {
+ // Set to INFO
+ assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.INFO)).isTrue();
+ assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isTrue();
+ assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isFalse();
+
+ // Set to WARNING
+ assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.WARNING)).isTrue();
+ assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.WARNING)).isTrue();
+ assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isFalse();
+
+ // Set to DEBUG
+ assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.DBG)).isTrue();
+ assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isTrue();
+ assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE)).isFalse();
+
+ // Set to VERBOSE
+ assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.VERBOSE, (short) 1)).isTrue();
+ assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 1)).isTrue();
+ assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 2)).isFalse();
+
+ assertThat(IcingSearchEngine.getLoggingTag()).isNotEmpty();
+ }
+
private static void assertStatusOk(StatusProto status) {
assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK);
}
diff --git a/proto/icing/proto/debug.proto b/proto/icing/proto/debug.proto
index 504ae43..90d1981 100644
--- a/proto/icing/proto/debug.proto
+++ b/proto/icing/proto/debug.proto
@@ -24,48 +24,57 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
+message LogSeverity {
+ enum Code {
+ VERBOSE = 0;
+ // Unable to use DEBUG at this time because it breaks YTM's iOS tests
+ // cs/?q=%22-DDEBUG%3D1%22%20f:%2FYoutubeMusic%20f:blueprint&ssfr=1
+ DBG = 1;
+ INFO = 2;
+ WARNING = 3;
+ ERROR = 4;
+ FATAL = 5;
+ }
+}
+
+message DebugInfoVerbosity {
+ enum Code {
+ // Simplest debug information.
+ BASIC = 0;
+ // More detailed debug information as indicated in the field documentation
+ // below.
+ DETAILED = 1;
+ }
+}
+
// Next tag: 4
message IndexDebugInfoProto {
// Storage information of the index.
optional IndexStorageInfoProto index_storage_info = 1;
- message MainIndexDebugInfoProto {
- // Information about the main lexicon.
- // TODO(b/222349894) Convert the string output to a protocol buffer instead.
- optional string lexicon_info = 1;
-
- // Last added document id.
- optional uint32 last_added_document_id = 2;
-
- // If verbosity > 0, return information about the posting list storage.
- // TODO(b/222349894) Convert the string output to a protocol buffer instead.
- optional string flash_index_storage_info = 3;
- }
- optional MainIndexDebugInfoProto main_index_info = 2;
-
- message LiteIndexDebugInfoProto {
- // Current number of hits.
- optional uint32 curr_size = 1;
-
- // The maximum possible number of hits.
- optional uint32 hit_buffer_size = 2;
-
- // Last added document id.
- optional uint32 last_added_document_id = 3;
-
- // The first position in the hit buffer that is not sorted yet,
- // or curr_size if all hits are sorted.
- optional uint32 searchable_end = 4;
-
- // The most recent checksum of the lite index, by calling
- // LiteIndex::ComputeChecksum().
- optional uint32 index_crc = 5;
-
- // Information about the lite lexicon.
- // TODO(b/222349894) Convert the string output to a protocol buffer instead.
- optional string lexicon_info = 6;
- }
- optional LiteIndexDebugInfoProto lite_index_info = 3;
+ // A formatted string containing the following information:
+ // lexicon_info: Information about the main lexicon
+ // last_added_document_id: Last added document id
+ // flash_index_storage_info: If verbosity = DETAILED, return information about
+ // the posting list storage
+ //
+ // No direct contents from user-provided documents will ever appear in this
+ // string.
+ optional string main_index_info = 2;
+
+ // A formatted string containing the following information:
+ // curr_size: Current number of hits
+ // hit_buffer_size: The maximum possible number of hits
+ // last_added_document_id: Last added document id
+ // searchable_end: The first position in the hit buffer that is not sorted
+ // yet, or curr_size if all hits are sorted
+ // index_crc: The most recent checksum of the lite index, by calling
+ // LiteIndex::ComputeChecksum()
+ // lexicon_info: Information about the lite lexicon
+ //
+ // No direct contents from user-provided documents will ever appear in this
+ // string.
+ optional string lite_index_info = 3;
}
// Next tag: 4
@@ -84,8 +93,8 @@ message DocumentDebugInfoProto {
optional uint32 total_token = 4;
}
- // If verbosity > 0, return the total number of documents and tokens in each
- // (namespace, schema type) pair.
+ // If verbosity = DETAILED, return the total number of documents and tokens in
+ // each (namespace, schema type) pair.
// Note that deleted and expired documents are skipped in the output.
repeated CorpusInfo corpus_info = 3;
}
@@ -117,7 +126,8 @@ message DebugInfoProto {
message DebugInfoResultProto {
// Status code can be one of:
// OK
- // FAILED_PRECONDITION
+ // FAILED_PRECONDITION if IcingSearchEngine has not been initialized yet
+ // INTERNAL on IO errors, crc compute error.
//
// See status.proto for more details.
optional StatusProto status = 1;
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 73d349b..305f410 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=436284873)
+set(synced_AOSP_CL_number=455217954)