diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-07-12 10:01:46 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-07-12 10:01:46 +0000 |
commit | e8471d976df1820c39999a32beefc9d4d8b200fb (patch) | |
tree | 27fb842ec40ad6eb9374fa046203584dc81d9c84 | |
parent | dc417b838162dd338aaa2adb775c46ea0ae0b345 (diff) | |
parent | ecb3673040687444c8e6a573b54a3affc4e3a963 (diff) | |
download | icing-android13-mainline-go-mediaprovider-release.tar.gz |
Snap for 8820681 from ecb3673040687444c8e6a573b54a3affc4e3a963 to mainline-go-mediaprovider-releaseaml_go_mpr_330912000android13-mainline-go-mediaprovider-release
Change-Id: I0c2524e6de8ddf957c18438c0b827242ce1c5147
92 files changed, 7533 insertions, 925 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c8e439..48a63d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ project(icing) add_definitions("-DICING_REVERSE_JNI_SEGMENTATION=1") set(VERSION_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/icing/jni.lds") +set(CMAKE_CXX_STANDARD 17) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--version-script=${VERSION_SCRIPT}") diff --git a/icing/file/destructible-directory.h b/icing/file/destructible-directory.h new file mode 100644 index 0000000..9a8bd4b --- /dev/null +++ b/icing/file/destructible-directory.h @@ -0,0 +1,74 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_ +#define ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_ + +#include "icing/file/filesystem.h" +#include "icing/util/logging.h" + +namespace icing { +namespace lib { + +// A convenient RAII class which will recursively create the directory at the +// specified file path and delete it upon destruction. +class DestructibleDirectory { + public: + explicit DestructibleDirectory(const Filesystem* filesystem, std::string dir) + : filesystem_(filesystem), dir_(std::move(dir)) { + is_valid_ = filesystem_->CreateDirectoryRecursively(dir_.c_str()); + } + + DestructibleDirectory(const DestructibleDirectory&) = delete; + DestructibleDirectory& operator=(const DestructibleDirectory&) = delete; + + DestructibleDirectory(DestructibleDirectory&& rhs) + : filesystem_(nullptr), is_valid_(false) { + Swap(rhs); + } + + DestructibleDirectory& operator=(DestructibleDirectory&& rhs) { + Swap(rhs); + return *this; + } + + ~DestructibleDirectory() { + if (filesystem_ != nullptr && + !filesystem_->DeleteDirectoryRecursively(dir_.c_str())) { + // Swallow deletion failures as there's nothing actionable to do about + // them. + ICING_LOG(WARNING) << "Unable to delete temporary directory: " << dir_; + } + } + + const std::string& dir() const { return dir_; } + + bool is_valid() const { return is_valid_; } + + private: + void Swap(DestructibleDirectory& other) { + std::swap(filesystem_, other.filesystem_); + std::swap(dir_, other.dir_); + std::swap(is_valid_, other.is_valid_); + } + + const Filesystem* filesystem_; + std::string dir_; + bool is_valid_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_ diff --git a/icing/file/destructible-directory_test.cc b/icing/file/destructible-directory_test.cc new file mode 100644 index 0000000..c62db3b --- /dev/null +++ b/icing/file/destructible-directory_test.cc @@ -0,0 +1,118 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/file/destructible-directory.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/file/filesystem.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::Eq; + +TEST(DestructibleFileTest, DeletesDirectoryProperly) { + Filesystem filesystem; + std::string dir_path = GetTestTempDir() + "/dir1"; + std::string file_path = dir_path + "/file1"; + + { + // 1. Create a file in the directory. + ASSERT_TRUE(filesystem.CreateDirectoryRecursively(dir_path.c_str())); + ScopedFd sfd(filesystem.OpenForWrite(file_path.c_str())); + ASSERT_TRUE(sfd.is_valid()); + int i = 127; + ASSERT_TRUE(filesystem.Write(sfd.get(), &i, sizeof(i))); + } + + { + // 2. Open the directory with a DestructibleDirectory + DestructibleDirectory destructible(&filesystem, dir_path); + EXPECT_TRUE(destructible.is_valid()); + EXPECT_THAT(destructible.dir(), Eq(dir_path)); + } + + // 3. Ensure that the file and directory don't exist. + EXPECT_FALSE(filesystem.FileExists(file_path.c_str())); + EXPECT_FALSE(filesystem.DirectoryExists(dir_path.c_str())); +} + +TEST(DestructibleFileTest, MoveAssignDeletesFileProperly) { + Filesystem filesystem; + std::string filepath1 = GetTestTempDir() + "/dir1"; + std::string filepath2 = GetTestTempDir() + "/dir2"; + + // 1. Create dir1 + DestructibleDirectory destructible1(&filesystem, filepath1); + ASSERT_TRUE(destructible1.is_valid()); + ASSERT_TRUE(filesystem.DirectoryExists(filepath1.c_str())); + + { + // 2. Create dir2 + DestructibleDirectory destructible2(&filesystem, filepath2); + ASSERT_TRUE(destructible2.is_valid()); + + // Move assign destructible2 into destructible1 + destructible1 = std::move(destructible2); + } + + // 3. dir1 shouldn't exist because it was destroyed when destructible1 was + // move assigned to. + EXPECT_FALSE(filesystem.DirectoryExists(filepath1.c_str())); + + // 4. dir2 should still exist because it moved into destructible1 from + // destructible2. + EXPECT_TRUE(filesystem.DirectoryExists(filepath2.c_str())); +} + +TEST(DestructibleFileTest, MoveConstructionDeletesFileProperly) { + Filesystem filesystem; + std::string filepath1 = GetTestTempDir() + "/dir1"; + + // 1. Create destructible1, it'll be reconstructed soon anyways. + std::unique_ptr<DestructibleDirectory> destructible1; + { + // 2. Create file1 + DestructibleDirectory destructible2(&filesystem, filepath1); + ASSERT_TRUE(destructible2.is_valid()); + + // Move construct destructible1 from destructible2 + destructible1 = + std::make_unique<DestructibleDirectory>(std::move(destructible2)); + } + + // 3. dir1 should still exist because it moved into destructible1 from + // destructible2. + EXPECT_TRUE(destructible1->is_valid()); + EXPECT_TRUE(filesystem.DirectoryExists(filepath1.c_str())); + + { + // 4. Move construct destructible3 from destructible1 + DestructibleDirectory destructible3(std::move(*destructible1)); + EXPECT_TRUE(destructible3.is_valid()); + } + + // 5. dir1 shouldn't exist because it was destroyed when destructible3 was + // destroyed. + EXPECT_FALSE(filesystem.DirectoryExists(filepath1.c_str())); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h index 686b4fb..ad7fae9 100644 --- a/icing/file/file-backed-proto-log.h +++ b/icing/file/file-backed-proto-log.h @@ -455,8 +455,8 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem, absl_ports::StrCat("Error truncating file: ", file_path)); } - ICING_LOG(INFO) << "Truncated '" << file_path << "' to size " - << last_known_good; + ICING_LOG(WARNING) << "Truncated '" << file_path << "' to size " + << last_known_good; } CreateResult create_result = { diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc index ed94fa5..2f60c6b 100644 --- a/icing/file/file-backed-vector_test.cc +++ b/icing/file/file-backed-vector_test.cc @@ -23,16 +23,16 @@ #include <string_view> #include <vector> -#include "knowledge/cerebra/sense/text_classifier/lib3/utils/base/status.h" -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/icing/file/filesystem.h" -#include "third_party/icing/file/memory-mapped-file.h" -#include "third_party/icing/file/mock-filesystem.h" -#include "third_party/icing/testing/common-matchers.h" -#include "third_party/icing/testing/tmp-directory.h" -#include "third_party/icing/util/crc32.h" -#include "third_party/icing/util/logging.h" +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/file/filesystem.h" +#include "icing/file/memory-mapped-file.h" +#include "icing/file/mock-filesystem.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" +#include "icing/util/crc32.h" +#include "icing/util/logging.h" using ::testing::Eq; using ::testing::IsTrue; @@ -662,7 +662,7 @@ TEST_F(FileBackedVectorTest, RemapFailureStillValidInstance) { // 2. The next Set call should cause a resize and a remap. Make that remap // fail. int num_calls = 0; - auto open_lambda = [this, &num_calls](const char* file_name){ + auto open_lambda = [this, &num_calls](const char* file_name) { if (++num_calls == 2) { return -1; } diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc index 9ff3adb..fc13a79 100644 --- a/icing/file/memory-mapped-file.cc +++ b/icing/file/memory-mapped-file.cc @@ -73,8 +73,6 @@ libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset, if (mmap_size == 0) { // First unmap any previously mmapped region. Unmap(); - - // Nothing more to do. return libtextclassifier3::Status::OK; } @@ -122,6 +120,7 @@ libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset, mmap_flags, fd.get(), aligned_offset); if (mmap_result == MAP_FAILED) { + mmap_result = nullptr; return absl_ports::InternalError(absl_ports::StrCat( "Failed to mmap region due to error: ", strerror(errno))); } diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc index 80a8011..d7ea4bb 100644 --- a/icing/file/portable-file-backed-proto-log_benchmark.cc +++ b/icing/file/portable-file-backed-proto-log_benchmark.cc @@ -33,7 +33,7 @@ // icing/file:portable-file-backed-proto-log_benchmark // // $ blaze-bin/icing/file/portable-file-backed-proto-log_benchmark -// --benchmarks=all +// --benchmark_filter=all // // // To build and run on an Android device (must be connected and rooted): @@ -48,7 +48,7 @@ // /data/local/tmp/ // // $ adb shell /data/local/tmp/portable-file-backed-proto-log-benchmark -// --benchmarks=all +// --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 952ba21..e390f0f 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -529,7 +529,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( } result_state_manager_ = std::make_unique<ResultStateManager>( - performance_configuration_.max_num_total_hits, *document_store_); + performance_configuration_.max_num_total_hits, *document_store_, + clock_.get()); return status; } @@ -1374,6 +1375,46 @@ StorageInfoResultProto IcingSearchEngine::GetStorageInfo() { return result; } +DebugInfoResultProto IcingSearchEngine::GetDebugInfo( + DebugInfoVerbosity::Code verbosity) { + DebugInfoResultProto debug_info; + StatusProto* result_status = debug_info.mutable_status(); + absl_ports::shared_lock l(&mutex_); + if (!initialized_) { + debug_info.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION); + debug_info.mutable_status()->set_message( + "IcingSearchEngine has not been initialized!"); + return debug_info; + } + + // Index + *debug_info.mutable_debug_info()->mutable_index_info() = + index_->GetDebugInfo(verbosity); + + // Document Store + libtextclassifier3::StatusOr<DocumentDebugInfoProto> document_debug_info = + document_store_->GetDebugInfo(verbosity); + if (!document_debug_info.ok()) { + TransformStatus(document_debug_info.status(), result_status); + return debug_info; + } + *debug_info.mutable_debug_info()->mutable_document_info() = + std::move(document_debug_info).ValueOrDie(); + + // Schema Store + libtextclassifier3::StatusOr<SchemaDebugInfoProto> schema_debug_info = + schema_store_->GetDebugInfo(); + if (!schema_debug_info.ok()) { + TransformStatus(schema_debug_info.status(), result_status); + return debug_info; + } + *debug_info.mutable_debug_info()->mutable_schema_info() = + std::move(schema_debug_info).ValueOrDie(); + + result_status->set_code(StatusProto::OK); + return debug_info; +} + libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk( PersistType::Code persist_type) { if (persist_type == PersistType::LITE) { @@ -1695,7 +1736,8 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore( } document_store_ = std::move(create_result_or.ValueOrDie().document_store); result_state_manager_ = std::make_unique<ResultStateManager>( - performance_configuration_.max_num_total_hits, *document_store_); + performance_configuration_.max_num_total_hits, *document_store_, + clock_.get()); // Potential data loss // TODO(b/147373249): Find a way to detect true data loss error @@ -1717,7 +1759,8 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore( } document_store_ = std::move(create_result_or.ValueOrDie().document_store); result_state_manager_ = std::make_unique<ResultStateManager>( - performance_configuration_.max_num_total_hits, *document_store_); + performance_configuration_.max_num_total_hits, *document_store_, + clock_.get()); // Deletes tmp directory if (!filesystem_->DeleteDirectoryRecursively( diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h index ff9c7fb..6a06fb9 100644 --- a/icing/icing-search-engine.h +++ b/icing/icing-search-engine.h @@ -403,6 +403,10 @@ class IcingSearchEngine { // that field will be set to -1. StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_); + // Get debug information for Icing. + DebugInfoResultProto GetDebugInfo(DebugInfoVerbosity::Code verbosity) + ICING_LOCKS_EXCLUDED(mutex_); + // Clears all data from Icing and re-initializes. Clients DO NOT need to call // Initialize again. // diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc index 5e610d5..6db66f6 100644 --- a/icing/icing-search-engine_benchmark.cc +++ b/icing/icing-search-engine_benchmark.cc @@ -51,7 +51,7 @@ // //icing:icing-search-engine_benchmark // // $ blaze-bin/icing/icing-search-engine_benchmark -// --benchmarks=all --benchmark_memory_usage +// --benchmark_filter=all --benchmark_memory_usage // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -61,7 +61,8 @@ // $ adb push blaze-bin/icing/icing-search-engine_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/icing-search-engine_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/icing-search-engine_benchmark +// --benchmark_filter=all namespace icing { namespace lib { @@ -222,24 +223,19 @@ void BM_IndexLatency(benchmark::State& state) { std::unique_ptr<IcingSearchEngine> icing = std::make_unique<IcingSearchEngine>(options); - ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); - int num_docs = state.range(0); std::vector<std::string> language = CreateLanguages(kLanguageSize, &random); const std::vector<DocumentProto> random_docs = GenerateRandomDocuments(&type_selector, num_docs, language); - Timer timer; - for (const DocumentProto& doc : random_docs) { - ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk()); + for (auto _ : state) { + state.PauseTiming(); + ASSERT_THAT(icing->Reset().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + state.ResumeTiming(); + for (const DocumentProto& doc : random_docs) { + ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk()); + } } - int64_t time_taken_ns = timer.GetElapsedNanoseconds(); - int64_t time_per_doc_ns = time_taken_ns / num_docs; - std::cout << "Number of indexed documents:\t" << num_docs - << "\t\tNumber of indexed sections:\t" << state.range(1) - << "\t\tTime taken (ms):\t" << time_taken_ns / 1000000 - << "\t\tTime taken per doc (us):\t" << time_per_doc_ns / 1000 - << std::endl; } BENCHMARK(BM_IndexLatency) // Arguments: num_indexed_documents, num_sections diff --git a/icing/icing-search-engine_flush_benchmark.cc b/icing/icing-search-engine_flush_benchmark.cc index de8f550..04e83fe 100644 --- a/icing/icing-search-engine_flush_benchmark.cc +++ b/icing/icing-search-engine_flush_benchmark.cc @@ -48,7 +48,7 @@ // //icing:icing-search-engine_flush_benchmark // // $ blaze-bin/icing/icing-search-engine_flush_benchmark -// --benchmarks=all --benchmark_memory_usage +// --benchmark_filter=all --benchmark_memory_usage // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -59,7 +59,7 @@ // /data/local/tmp/ // // $ adb shell /data/local/tmp/icing-search-engine_flush_benchmark -// --benchmarks=all +// --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index 13e77b8..f922b98 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -3003,7 +3003,6 @@ TEST_F(IcingSearchEngineTest, OptimizationFailureUninitializesIcing) { HasSubstr("document_dir"))) .WillByDefault(swap_lambda); TestIcingSearchEngine icing(options, std::move(mock_filesystem), - std::move(mock_filesystem), std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); @@ -8680,6 +8679,81 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) { ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } +TEST_F(IcingSearchEngineTest, GetDebugInfoVerbosityBasicSucceeds) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Create a document. + DocumentProto document = CreateMessageDocument("namespace", "email"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + DebugInfoResultProto result = icing.GetDebugInfo(DebugInfoVerbosity::BASIC); + EXPECT_THAT(result.status(), ProtoIsOk()); + + // Some sanity checks + DebugInfoProto debug_info = result.debug_info(); + EXPECT_THAT( + debug_info.document_info().document_storage_info().num_alive_documents(), + Eq(1)); + EXPECT_THAT(debug_info.document_info().corpus_info(), + IsEmpty()); // because verbosity=BASIC + EXPECT_THAT(debug_info.schema_info().crc(), Gt(0)); +} + +TEST_F(IcingSearchEngineTest, + GetDebugInfoVerbosityDetailedSucceedsWithCorpusInfo) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Create 4 documents. + DocumentProto document1 = CreateMessageDocument("namespace1", "email/1"); + DocumentProto document2 = CreateMessageDocument("namespace1", "email/2"); + DocumentProto document3 = CreateMessageDocument("namespace2", "email/3"); + DocumentProto document4 = CreateMessageDocument("namespace2", "email/4"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + + DebugInfoResultProto result = + icing.GetDebugInfo(DebugInfoVerbosity::DETAILED); + EXPECT_THAT(result.status(), ProtoIsOk()); + + // Some sanity checks + DebugInfoProto debug_info = result.debug_info(); + EXPECT_THAT( + debug_info.document_info().document_storage_info().num_alive_documents(), + Eq(4)); + EXPECT_THAT(debug_info.document_info().corpus_info(), SizeIs(2)); + EXPECT_THAT(debug_info.schema_info().crc(), Gt(0)); +} + +TEST_F(IcingSearchEngineTest, GetDebugInfoUninitialized) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + DebugInfoResultProto result = + icing.GetDebugInfo(DebugInfoVerbosity::DETAILED); + EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); +} + +TEST_F(IcingSearchEngineTest, GetDebugInfoNoSchemaNoDocumentsSucceeds) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + DebugInfoResultProto result = + icing.GetDebugInfo(DebugInfoVerbosity::DETAILED); + ASSERT_THAT(result.status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineTest, GetDebugInfoWithSchemaNoDocumentsSucceeds) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + DebugInfoResultProto result = + icing.GetDebugInfo(DebugInfoVerbosity::DETAILED); + ASSERT_THAT(result.status(), ProtoIsOk()); +} + #ifndef ICING_JNI_TEST // We skip this test case when we're running in a jni_test since the data files // will be stored in the android-instrumented storage location, rather than the diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc index 207c033..edc7881 100644 --- a/icing/index/index-processor.cc +++ b/icing/index/index-processor.cc @@ -67,6 +67,11 @@ libtextclassifier3::Status IndexProcessor::IndexDocument( uint32_t num_tokens = 0; libtextclassifier3::Status status; for (const TokenizedSection& section : tokenized_document.sections()) { + if (section.metadata.tokenizer == + StringIndexingConfig::TokenizerType::NONE) { + ICING_LOG(WARNING) + << "Unexpected TokenizerType::NONE found when indexing document."; + } // TODO(b/152934343): pass real namespace ids in Index::Editor editor = index_->Edit(document_id, section.metadata.id, @@ -82,8 +87,6 @@ libtextclassifier3::Status IndexProcessor::IndexDocument( status = editor.BufferTerm(token.data()); break; case StringIndexingConfig::TokenizerType::NONE: - ICING_LOG(WARNING) - << "Unexpected TokenizerType::NONE found when indexing document."; [[fallthrough]]; case StringIndexingConfig::TokenizerType::PLAIN: std::string normalized_term = normalizer_.NormalizeTerm(token); diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc index 1aad7d0..68c592c 100644 --- a/icing/index/index-processor_benchmark.cc +++ b/icing/index/index-processor_benchmark.cc @@ -39,7 +39,7 @@ // //icing/index:index-processor_benchmark // // $ blaze-bin/icing/index/index-processor_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // Make target //icing/tokenization:language-segmenter depend on @@ -55,7 +55,7 @@ // $ adb push blaze-bin/icing/index/index-processor_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/index-processor_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/index-processor_benchmark --benchmark_filter=all // --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, diff --git a/icing/index/index.h b/icing/index/index.h index 5c53349..f101a91 100644 --- a/icing/index/index.h +++ b/icing/index/index.h @@ -140,11 +140,11 @@ class Index { } // Returns debug information for the index in out. - // verbosity <= 0, simplest debug information - just the lexicons and lite - // index. - // verbosity > 0, more detailed debug information including raw postings - // lists. - IndexDebugInfoProto GetDebugInfo(int verbosity) const { + // verbosity = BASIC, simplest debug information - just the lexicons and lite + // index. + // verbosity = DETAILED, more detailed debug information including raw + // postings lists. + IndexDebugInfoProto GetDebugInfo(DebugInfoVerbosity::Code verbosity) const { IndexDebugInfoProto debug_info; *debug_info.mutable_index_storage_info() = GetStorageInfo(); *debug_info.mutable_lite_index_info() = diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc index 8355c01..2eb3b59 100644 --- a/icing/index/index_test.cc +++ b/icing/index/index_test.cc @@ -41,6 +41,7 @@ #include "icing/testing/random-string.h" #include "icing/testing/tmp-directory.h" #include "icing/util/crc32.h" +#include "icing/util/logging.h" namespace icing { namespace lib { @@ -58,6 +59,8 @@ using ::testing::NiceMock; using ::testing::Not; using ::testing::Return; using ::testing::SizeIs; +using ::testing::StrEq; +using ::testing::StrNe; using ::testing::Test; using ::testing::UnorderedElementsAre; @@ -76,10 +79,10 @@ class IndexTest : public Test { icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()); } - std::unique_ptr<Index> index_; - std::string index_dir_; - IcingFilesystem icing_filesystem_; Filesystem filesystem_; + IcingFilesystem icing_filesystem_; + std::string index_dir_; + std::unique_ptr<Index> index_; }; constexpr DocumentId kDocumentId0 = 0; @@ -1410,17 +1413,19 @@ TEST_F(IndexTest, GetDebugInfo) { ASSERT_THAT(edit.BufferTerm("foo"), IsOk()); EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk()); - IndexDebugInfoProto out0 = index_->GetDebugInfo(/*verbosity=*/0); - EXPECT_FALSE(out0.main_index_info().has_flash_index_storage_info()); - EXPECT_THAT(out0.main_index_info().last_added_document_id(), - Eq(kDocumentId1)); - EXPECT_THAT(out0.lite_index_info().curr_size(), Eq(2)); - EXPECT_THAT(out0.lite_index_info().last_added_document_id(), - Eq(kDocumentId2)); + IndexDebugInfoProto out0 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC); + ICING_LOG(DBG) << "main_index_info:\n" << out0.main_index_info(); + ICING_LOG(DBG) << "lite_index_info:\n" << out0.lite_index_info(); + EXPECT_THAT(out0.main_index_info(), Not(IsEmpty())); + EXPECT_THAT(out0.lite_index_info(), Not(IsEmpty())); - IndexDebugInfoProto out1 = index_->GetDebugInfo(/*verbosity=*/1); - EXPECT_THAT(out1.main_index_info().flash_index_storage_info(), - Not(IsEmpty())); + IndexDebugInfoProto out1 = index_->GetDebugInfo(DebugInfoVerbosity::DETAILED); + ICING_LOG(DBG) << "main_index_info:\n" << out1.main_index_info(); + ICING_LOG(DBG) << "lite_index_info:\n" << out1.lite_index_info(); + EXPECT_THAT(out1.main_index_info(), + SizeIs(Gt(out0.main_index_info().size()))); + EXPECT_THAT(out1.lite_index_info(), + SizeIs(Gt(out0.lite_index_info().size()))); // Add one more doc to the lite index. Debug strings should change. edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY, @@ -1429,26 +1434,25 @@ TEST_F(IndexTest, GetDebugInfo) { ASSERT_THAT(edit.BufferTerm("far"), IsOk()); EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk()); - IndexDebugInfoProto out2 = index_->GetDebugInfo(/*verbosity=*/0); - EXPECT_THAT(out2.lite_index_info().curr_size(), Eq(3)); - EXPECT_THAT(out2.lite_index_info().last_added_document_id(), - Eq(kDocumentId3)); + IndexDebugInfoProto out2 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC); + ICING_LOG(DBG) << "main_index_info:\n" << out2.main_index_info(); + ICING_LOG(DBG) << "lite_index_info:\n" << out2.lite_index_info(); + EXPECT_THAT(out2.main_index_info(), Not(IsEmpty())); + EXPECT_THAT(out2.lite_index_info(), Not(IsEmpty())); + EXPECT_THAT(out2.main_index_info(), StrEq(out0.main_index_info())); + EXPECT_THAT(out2.lite_index_info(), StrNe(out0.lite_index_info())); - // Merge into the man index. Debuug strings should change again. + // Merge into the man index. Debug strings should change again. ICING_ASSERT_OK(index_->Merge()); - IndexDebugInfoProto out3 = index_->GetDebugInfo(/*verbosity=*/0); + IndexDebugInfoProto out3 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC); EXPECT_TRUE(out3.has_index_storage_info()); - EXPECT_THAT(out3.main_index_info().lexicon_info(), Not(IsEmpty())); - EXPECT_THAT(out3.main_index_info().last_added_document_id(), - Eq(kDocumentId3)); - EXPECT_THAT(out3.lite_index_info().curr_size(), Eq(0)); - EXPECT_THAT(out3.lite_index_info().hit_buffer_size(), Gt(0)); - EXPECT_THAT(out3.lite_index_info().last_added_document_id(), - Eq(kInvalidDocumentId)); - EXPECT_THAT(out3.lite_index_info().searchable_end(), Eq(0)); - EXPECT_THAT(out3.lite_index_info().index_crc(), Gt(0)); - EXPECT_THAT(out3.lite_index_info().lexicon_info(), Not(IsEmpty())); + ICING_LOG(DBG) << "main_index_info:\n" << out3.main_index_info(); + ICING_LOG(DBG) << "lite_index_info:\n" << out3.lite_index_info(); + EXPECT_THAT(out3.main_index_info(), Not(IsEmpty())); + EXPECT_THAT(out3.lite_index_info(), Not(IsEmpty())); + EXPECT_THAT(out3.main_index_info(), StrNe(out2.main_index_info())); + EXPECT_THAT(out3.lite_index_info(), StrNe(out2.lite_index_info())); } TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) { diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc index 933f9b5..2e8ba23 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter.cc +++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc @@ -66,25 +66,19 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter( libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() { while (delegate_->Advance().ok()) { - if (!document_store_.DoesDocumentExist( - delegate_->doc_hit_info().document_id())) { - // Document doesn't exist, keep searching. This handles deletions and - // expired documents. - continue; - } - // Try to get the DocumentFilterData - auto document_filter_data_or = document_store_.GetDocumentFilterData( - delegate_->doc_hit_info().document_id()); - if (!document_filter_data_or.ok()) { + auto document_filter_data_optional = + document_store_.GetAliveDocumentFilterData( + delegate_->doc_hit_info().document_id()); + if (!document_filter_data_optional) { // Didn't find the DocumentFilterData in the filter cache. This could be - // because the DocumentId isn't valid or the filter cache is in some - // invalid state. This is bad, but not the query's responsibility to fix, - // so just skip this result for now. + // because the Document doesn't exist or the DocumentId isn't valid or the + // filter cache is in some invalid state. This is bad, but not the query's + // responsibility to fix, so just skip this result for now. continue; } // We should be guaranteed that this exists now. - DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie(); + DocumentFilterData data = document_filter_data_optional.value(); if (!options_.namespaces.empty() && target_namespace_ids_.count(data.namespace_id()) == 0) { diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc index 034c8cb..9d33e2c 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc @@ -51,15 +51,15 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { SectionIdMask section_id_mask = delegate_->doc_hit_info().hit_section_ids_mask(); - auto data_or = document_store_.GetDocumentFilterData(document_id); - if (!data_or.ok()) { + auto data_optional = + document_store_.GetAliveDocumentFilterData(document_id); + if (!data_optional) { // Ran into some error retrieving information on this hit, skip continue; } // Guaranteed that the DocumentFilterData exists at this point - DocumentFilterData data = std::move(data_or).ValueOrDie(); - SchemaTypeId schema_type_id = data.schema_type_id(); + SchemaTypeId schema_type_id = data_optional.value().schema_type_id(); // A hit can be in multiple sections at once, need to check that at least // one of the confirmed section ids match the name of the target section diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc index f975989..993c3b8 100644 --- a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc +++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc @@ -35,7 +35,7 @@ namespace { // // $ // blaze-bin/icing/index/iterator/doc-hit-info-iterator_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // $ blaze build --config=android_arm64 -c opt --dynamic_mode=off @@ -47,7 +47,7 @@ namespace { // /data/local/tmp/ // // $ adb shell /data/local/tmp/doc-hit-info-iterator_benchmark -// --benchmarks=all +// --benchmark_filter=all // Functor to be used with std::generate to create a container of DocHitInfos. // DocHitInfos are generated starting at docid starting_docid and continuing at diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index a5c6baf..fc40225 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -391,15 +391,22 @@ bool LiteIndex::is_full() const { lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction)); } -IndexDebugInfoProto::LiteIndexDebugInfoProto LiteIndex::GetDebugInfo( - int verbosity) { - IndexDebugInfoProto::LiteIndexDebugInfoProto res; - res.set_curr_size(header_->cur_size()); - res.set_hit_buffer_size(options_.hit_buffer_size); - res.set_last_added_document_id(header_->last_added_docid()); - res.set_searchable_end(header_->searchable_end()); - res.set_index_crc(ComputeChecksum().Get()); - lexicon_.GetDebugInfo(verbosity, res.mutable_lexicon_info()); +std::string LiteIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) { + std::string res; + std::string lexicon_info; + lexicon_.GetDebugInfo(verbosity, &lexicon_info); + IcingStringUtil::SStringAppendF( + &res, 0, + "curr_size: %u\n" + "hit_buffer_size: %u\n" + "last_added_document_id %u\n" + "searchable_end: %u\n" + "index_crc: %u\n" + "\n" + "lite_lexicon_info:\n%s\n", + header_->cur_size(), options_.hit_buffer_size, + header_->last_added_docid(), header_->searchable_end(), + ComputeChecksum().Get(), lexicon_info.c_str()); return res; } diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h index 378fc94..42d69f8 100644 --- a/icing/index/lite/lite-index.h +++ b/icing/index/lite/lite-index.h @@ -240,9 +240,9 @@ class LiteIndex { const IcingDynamicTrie& lexicon() const { return lexicon_; } // Returns debug information for the index in out. - // verbosity <= 0, simplest debug information - size of lexicon, hit buffer - // verbosity > 0, more detailed debug information from the lexicon. - IndexDebugInfoProto::LiteIndexDebugInfoProto GetDebugInfo(int verbosity); + // verbosity = BASIC, simplest debug information - size of lexicon, hit buffer + // verbosity = DETAILED, more detailed debug information from the lexicon. + std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity); // Returns the byte size of all the elements held in the index. This excludes // the size of any internal metadata of the index, e.g. the index's header. diff --git a/icing/index/main/flash-index-storage.cc b/icing/index/main/flash-index-storage.cc index 3c52375..dabff28 100644 --- a/icing/index/main/flash-index-storage.cc +++ b/icing/index/main/flash-index-storage.cc @@ -503,7 +503,8 @@ void FlashIndexStorage::FlushInMemoryFreeList() { } } -void FlashIndexStorage::GetDebugInfo(int verbosity, std::string* out) const { +void FlashIndexStorage::GetDebugInfo(DebugInfoVerbosity::Code verbosity, + std::string* out) const { // Dump and check integrity of the index block free lists. out->append("Free lists:\n"); for (size_t i = 0; i < header_block_->header()->num_index_block_infos; ++i) { diff --git a/icing/index/main/flash-index-storage.h b/icing/index/main/flash-index-storage.h index 6c6fbb8..fceb26f 100644 --- a/icing/index/main/flash-index-storage.h +++ b/icing/index/main/flash-index-storage.h @@ -160,7 +160,7 @@ class FlashIndexStorage { libtextclassifier3::Status Reset(); // TODO(b/222349894) Convert the string output to a protocol buffer instead. - void GetDebugInfo(int verbosity, std::string* out) const; + void GetDebugInfo(DebugInfoVerbosity::Code verbosity, std::string* out) const; private: FlashIndexStorage(const std::string& index_filename, diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc index 2d6007b..158c287 100644 --- a/icing/index/main/main-index.cc +++ b/icing/index/main/main-index.cc @@ -16,6 +16,7 @@ #include <cstdint> #include <cstring> #include <memory> +#include <string> #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" @@ -607,21 +608,28 @@ libtextclassifier3::Status MainIndex::AddPrefixBackfillHits( return libtextclassifier3::Status::OK; } -IndexDebugInfoProto::MainIndexDebugInfoProto MainIndex::GetDebugInfo( - int verbosity) const { - IndexDebugInfoProto::MainIndexDebugInfoProto res; +std::string MainIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) const { + std::string res; // Lexicon. - main_lexicon_->GetDebugInfo(verbosity, res.mutable_lexicon_info()); + std::string lexicon_info; + main_lexicon_->GetDebugInfo(verbosity, &lexicon_info); - res.set_last_added_document_id(last_added_document_id()); + IcingStringUtil::SStringAppendF(&res, 0, + "last_added_document_id: %u\n" + "\n" + "main_lexicon_info:\n%s\n", + last_added_document_id(), + lexicon_info.c_str()); - if (verbosity <= 0) { + if (verbosity == DebugInfoVerbosity::BASIC) { return res; } - flash_index_storage_->GetDebugInfo(verbosity, - res.mutable_flash_index_storage_info()); + std::string flash_index_storage_info; + flash_index_storage_->GetDebugInfo(verbosity, &flash_index_storage_info); + IcingStringUtil::SStringAppendF(&res, 0, "flash_index_storage_info:\n%s\n", + flash_index_storage_info.c_str()); return res; } diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h index abb0418..d6f7d5f 100644 --- a/icing/index/main/main-index.h +++ b/icing/index/main/main-index.h @@ -183,11 +183,10 @@ class MainIndex { IndexStorageInfoProto storage_info) const; // Returns debug information for the main index in out. - // verbosity <= 0, simplest debug information - just the lexicon - // verbosity > 0, more detailed debug information including raw postings - // lists. - IndexDebugInfoProto::MainIndexDebugInfoProto GetDebugInfo( - int verbosity) const; + // verbosity = BASIC, simplest debug information - just the lexicon + // verbosity = DETAILED, more detailed debug information including raw + // postings lists. + std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity) const; private: libtextclassifier3::Status Init(const std::string& index_directory, diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc index bcc35e6..17bb059 100644 --- a/icing/jni/icing-search-engine-jni.cc +++ b/icing/jni/icing-search-engine-jni.cc @@ -15,8 +15,11 @@ #include <jni.h> #include <string> +#include <utility> #include "icing/jni/jni-cache.h" +#include "icing/jni/scoped-primitive-array-critical.h" +#include "icing/jni/scoped-utf-chars.h" #include <google/protobuf/message_lite.h> #include "icing/absl_ports/status_imports.h" #include "icing/icing-search-engine.h" @@ -29,6 +32,7 @@ #include "icing/proto/search.pb.h" #include "icing/proto/storage.pb.h" #include "icing/proto/usage.pb.h" +#include "icing/util/logging.h" #include "icing/util/status-macros.h" namespace { @@ -39,13 +43,8 @@ const char kNativePointerField[] = "nativePointer"; bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes, google::protobuf::MessageLite* protobuf) { - int bytes_size = env->GetArrayLength(bytes); - uint8_t* bytes_ptr = static_cast<uint8_t*>( - env->GetPrimitiveArrayCritical(bytes, /*isCopy=*/nullptr)); - bool parsed = protobuf->ParseFromArray(bytes_ptr, bytes_size); - env->ReleasePrimitiveArrayCritical(bytes, bytes_ptr, /*mode=*/0); - - return parsed; + icing::lib::ScopedPrimitiveArrayCritical<uint8_t> scoped_array(env, bytes); + return protobuf->ParseFromArray(scoped_array.data(), scoped_array.size()); } jbyteArray SerializeProtoToJniByteArray( @@ -57,10 +56,8 @@ jbyteArray SerializeProtoToJniByteArray( return nullptr; } - uint8_t* ret_buf = static_cast<uint8_t*>( - env->GetPrimitiveArrayCritical(ret, /*isCopy=*/nullptr)); - protobuf.SerializeWithCachedSizesToArray(ret_buf); - env->ReleasePrimitiveArrayCritical(ret, ret_buf, 0); + icing::lib::ScopedPrimitiveArrayCritical<uint8_t> scoped_array(env, ret); + protobuf.SerializeWithCachedSizesToArray(scoped_array.data()); return ret; } @@ -162,11 +159,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType( icing::lib::IcingSearchEngine* icing = GetIcingSearchEnginePointer(env, object); - const char* native_schema_type = - env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr); + icing::lib::ScopedUtfChars scoped_schema_type_chars(env, schema_type); icing::lib::GetSchemaTypeResultProto get_schema_type_result_proto = - icing->GetSchemaType(native_schema_type); - env->ReleaseStringUTFChars(schema_type, native_schema_type); + icing->GetSchemaType(scoped_schema_type_chars.c_str()); return SerializeProtoToJniByteArray(env, get_schema_type_result_proto); } @@ -193,20 +188,19 @@ JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeGet( JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri, jbyteArray result_spec_bytes) { + icing::lib::IcingSearchEngine* icing = + GetIcingSearchEnginePointer(env, object); + icing::lib::GetResultSpecProto get_result_spec; if (!ParseProtoFromJniByteArray(env, result_spec_bytes, &get_result_spec)) { ICING_LOG(ERROR) << "Failed to parse GetResultSpecProto in nativeGet"; return nullptr; } - icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(env, object); - const char* native_name_space = - env->GetStringUTFChars(name_space, /*isCopy=*/nullptr); - const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr); + icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space); + icing::lib::ScopedUtfChars scoped_uri_chars(env, uri); icing::lib::GetResultProto get_result_proto = - icing->Get(native_name_space, native_uri, get_result_spec); - env->ReleaseStringUTFChars(uri, native_uri); - env->ReleaseStringUTFChars(name_space, native_name_space); + icing->Get(scoped_name_space_chars.c_str(), scoped_uri_chars.c_str(), + get_result_spec); return SerializeProtoToJniByteArray(env, get_result_proto); } @@ -303,13 +297,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDelete( icing::lib::IcingSearchEngine* icing = GetIcingSearchEnginePointer(env, object); - const char* native_name_space = - env->GetStringUTFChars(name_space, /*isCopy=*/nullptr); - const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr); + icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space); + icing::lib::ScopedUtfChars scoped_uri_chars(env, uri); icing::lib::DeleteResultProto delete_result_proto = - icing->Delete(native_name_space, native_uri); - env->ReleaseStringUTFChars(uri, native_uri); - env->ReleaseStringUTFChars(name_space, native_name_space); + icing->Delete(scoped_name_space_chars.c_str(), scoped_uri_chars.c_str()); return SerializeProtoToJniByteArray(env, delete_result_proto); } @@ -320,11 +311,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace( icing::lib::IcingSearchEngine* icing = GetIcingSearchEnginePointer(env, object); - const char* native_name_space = - env->GetStringUTFChars(name_space, /*isCopy=*/nullptr); + icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space); icing::lib::DeleteByNamespaceResultProto delete_by_namespace_result_proto = - icing->DeleteByNamespace(native_name_space); - env->ReleaseStringUTFChars(name_space, native_name_space); + icing->DeleteByNamespace(scoped_name_space_chars.c_str()); return SerializeProtoToJniByteArray(env, delete_by_namespace_result_proto); } @@ -335,18 +324,17 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType( icing::lib::IcingSearchEngine* icing = GetIcingSearchEnginePointer(env, object); - const char* native_schema_type = - env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr); + icing::lib::ScopedUtfChars scoped_schema_type_chars(env, schema_type); icing::lib::DeleteBySchemaTypeResultProto delete_by_schema_type_result_proto = - icing->DeleteBySchemaType(native_schema_type); - env->ReleaseStringUTFChars(schema_type, native_schema_type); + icing->DeleteBySchemaType(scoped_schema_type_chars.c_str()); return SerializeProtoToJniByteArray(env, delete_by_schema_type_result_proto); } JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery( - JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes) { + JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes, + jboolean return_deleted_document_info) { icing::lib::IcingSearchEngine* icing = GetIcingSearchEnginePointer(env, object); @@ -356,7 +344,7 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery( return nullptr; } icing::lib::DeleteByQueryResultProto delete_result_proto = - icing->DeleteByQuery(search_spec_proto); + icing->DeleteByQuery(search_spec_proto, return_deleted_document_info); return SerializeProtoToJniByteArray(env, delete_result_proto); } @@ -445,4 +433,49 @@ Java_com_google_android_icing_IcingSearchEngine_nativeSearchSuggestions( return SerializeProtoToJniByteArray(env, suggestionResponse); } +JNIEXPORT jbyteArray JNICALL +Java_com_google_android_icing_IcingSearchEngine_nativeGetDebugInfo( + JNIEnv* env, jclass clazz, jobject object, jint verbosity) { + icing::lib::IcingSearchEngine* icing = + GetIcingSearchEnginePointer(env, object); + + if (!icing::lib::DebugInfoVerbosity::Code_IsValid(verbosity)) { + ICING_LOG(ERROR) << "Invalid value for Debug Info verbosity: " << verbosity; + return nullptr; + } + + icing::lib::DebugInfoResultProto debug_info_result_proto = + icing->GetDebugInfo( + static_cast<icing::lib::DebugInfoVerbosity::Code>(verbosity)); + + return SerializeProtoToJniByteArray(env, debug_info_result_proto); +} + +JNIEXPORT jboolean JNICALL +Java_com_google_android_icing_IcingSearchEngine_nativeShouldLog( + JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) { + if (!icing::lib::LogSeverity::Code_IsValid(severity)) { + ICING_LOG(ERROR) << "Invalid value for logging severity: " << severity; + return false; + } + return icing::lib::ShouldLog( + static_cast<icing::lib::LogSeverity::Code>(severity), verbosity); +} + +JNIEXPORT jboolean JNICALL +Java_com_google_android_icing_IcingSearchEngine_nativeSetLoggingLevel( + JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) { + if (!icing::lib::LogSeverity::Code_IsValid(severity)) { + ICING_LOG(ERROR) << "Invalid value for logging severity: " << severity; + return false; + } + return icing::lib::SetLoggingLevel( + static_cast<icing::lib::LogSeverity::Code>(severity), verbosity); +} + +JNIEXPORT jstring JNICALL +Java_com_google_android_icing_IcingSearchEngine_nativeGetLoggingTag( + JNIEnv* env, jclass clazz) { + return env->NewStringUTF(icing::lib::kIcingLoggingTag); +} } // extern "C" diff --git a/icing/jni/scoped-primitive-array-critical.h b/icing/jni/scoped-primitive-array-critical.h new file mode 100644 index 0000000..062c145 --- /dev/null +++ b/icing/jni/scoped-primitive-array-critical.h @@ -0,0 +1,86 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_ +#define ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_ + +#include <jni.h> + +#include <utility> + +namespace icing { +namespace lib { + +template <typename T> +class ScopedPrimitiveArrayCritical { + public: + ScopedPrimitiveArrayCritical(JNIEnv* env, jarray array) + : env_(env), array_(array) { + if (array_ == nullptr) { + array_critical_ = nullptr; + array_critical_size_ = 0; + } else { + array_critical_size_ = env->GetArrayLength(array); + array_critical_ = static_cast<T*>( + env->GetPrimitiveArrayCritical(array, /*isCopy=*/nullptr)); + } + } + + ScopedPrimitiveArrayCritical(ScopedPrimitiveArrayCritical&& rhs) + : env_(nullptr), + array_(nullptr), + array_critical_(nullptr), + array_critical_size_(0) { + Swap(rhs); + } + + ScopedPrimitiveArrayCritical(const ScopedPrimitiveArrayCritical&) = delete; + + ScopedPrimitiveArrayCritical& operator=(ScopedPrimitiveArrayCritical&& rhs) { + Swap(rhs); + return *this; + } + + ScopedPrimitiveArrayCritical& operator=(const ScopedPrimitiveArrayCritical&) = + delete; + + ~ScopedPrimitiveArrayCritical() { + if (array_critical_ != nullptr && array_ != nullptr) { + env_->ReleasePrimitiveArrayCritical(array_, array_critical_, /*mode=*/0); + } + } + + T* data() { return array_critical_; } + const T* data() const { return array_critical_; } + + size_t size() const { return array_critical_size_; } + + private: + void Swap(ScopedPrimitiveArrayCritical& other) { + std::swap(env_, other.env_); + std::swap(array_, other.array_); + std::swap(array_critical_, other.array_critical_); + std::swap(array_critical_size_, other.array_critical_size_); + } + + JNIEnv* env_; + jarray array_; + T* array_critical_; + size_t array_critical_size_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_ diff --git a/icing/jni/scoped-utf-chars.h b/icing/jni/scoped-utf-chars.h new file mode 100644 index 0000000..2dafcc1 --- /dev/null +++ b/icing/jni/scoped-utf-chars.h @@ -0,0 +1,82 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef ICING_JNI_SCOPED_UTF_CHARS_H_ +#define ICING_JNI_SCOPED_UTF_CHARS_H_ + +#include <jni.h> + +#include <cstddef> +#include <cstring> +#include <utility> + +namespace icing { +namespace lib { + +// An RAII class to manage access and allocation of a Java string's UTF chars. +class ScopedUtfChars { + public: + ScopedUtfChars(JNIEnv* env, jstring s) : env_(env), string_(s) { + if (s == nullptr) { + utf_chars_ = nullptr; + size_ = 0; + } else { + utf_chars_ = env->GetStringUTFChars(s, /*isCopy=*/nullptr); + size_ = strlen(utf_chars_); + } + } + + ScopedUtfChars(ScopedUtfChars&& rhs) + : env_(nullptr), string_(nullptr), utf_chars_(nullptr) { + Swap(rhs); + } + + ScopedUtfChars(const ScopedUtfChars&) = delete; + + ScopedUtfChars& operator=(ScopedUtfChars&& rhs) { + Swap(rhs); + return *this; + } + + ScopedUtfChars& operator=(const ScopedUtfChars&) = delete; + + ~ScopedUtfChars() { + if (utf_chars_ != nullptr) { + env_->ReleaseStringUTFChars(string_, utf_chars_); + } + } + + const char* c_str() const { return utf_chars_; } + + size_t size() const { return size_; } + + private: + void Swap(ScopedUtfChars& other) { + std::swap(env_, other.env_); + std::swap(string_, other.string_); + std::swap(utf_chars_, other.utf_chars_); + std::swap(size_, other.size_); + } + + JNIEnv* env_; + jstring string_; + const char* utf_chars_; + size_t size_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_JNI_SCOPED_UTF_CHARS_H_ diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc index 77876c4..4428599 100644 --- a/icing/legacy/index/icing-dynamic-trie.cc +++ b/icing/legacy/index/icing-dynamic-trie.cc @@ -101,15 +101,9 @@ namespace { constexpr uint32_t kInvalidNodeIndex = (1U << 24) - 1; constexpr uint32_t kInvalidNextIndex = ~0U; -// Returns the number of valid nexts in the array. -int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start, - int next_array_length) { - int valid_nexts_length = 0; - for (; valid_nexts_length < next_array_length && - next_array_start[valid_nexts_length].node_index() != kInvalidNodeIndex; - ++valid_nexts_length) { - } - return valid_nexts_length; +void ResetMutableNext(IcingDynamicTrie::Next &mutable_next) { + mutable_next.set_val(0xff); + mutable_next.set_node_index(kInvalidNodeIndex); } } // namespace @@ -769,8 +763,7 @@ IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) { // Fill with char 0xff so we are sorted properly. for (int i = 0; i < aligned_size; i++) { - ret[i].set_val(0xff); - ret[i].set_node_index(kInvalidNodeIndex); + ResetMutableNext(ret[i]); } return ret; } @@ -1550,9 +1543,7 @@ bool IcingDynamicTrie::ResetNext(uint32_t next_index) { if (mutable_next == nullptr) { return false; } - - mutable_next->set_val(0); - mutable_next->set_node_index(kInvalidNodeIndex); + ResetMutableNext(*mutable_next); return true; } @@ -1570,7 +1561,7 @@ bool IcingDynamicTrie::SortNextArray(const Node *node) { return false; } - std::sort(next_array_start, next_array_start + next_array_buffer_size - 1); + std::sort(next_array_start, next_array_start + next_array_buffer_size); return true; } @@ -2116,22 +2107,33 @@ const IcingDynamicTrie::Next *IcingDynamicTrie::GetNextByChar( return found; } +int IcingDynamicTrie::GetValidNextsSize( + IcingDynamicTrie::Next *next_array_start, int next_array_length) const { + // Only searching for key char 0xff is not sufficient, as 0xff can be a valid + // character. We must also specify kInvalidNodeIndex as the target node index + // when searching the next array. + return LowerBound(next_array_start, next_array_start + next_array_length, + /*key_char=*/0xff, /*node_index=*/kInvalidNodeIndex) - + next_array_start; +} + const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound( - const Next *start, const Next *end, uint8_t key_char) const { + const Next *start, const Next *end, uint8_t key_char, + uint32_t node_index) const { // Above this value will use binary search instead of linear // search. 16 was chosen from running some benchmarks with // different values. static const uint32_t kBinarySearchCutoff = 16; + Next key_next(key_char, node_index); if (end - start >= kBinarySearchCutoff) { // Binary search. - Next key_next(key_char, 0); return lower_bound(start, end, key_next); } else { // Linear search. const Next *found; for (found = start; found < end; found++) { - if (found->val() >= key_char) { + if (!(*found < key_next)) { // Should have gotten match. break; } @@ -2275,6 +2277,40 @@ std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(const char *key, return prefix_lengths; } +bool IcingDynamicTrie::IsBranchingTerm(const char *key) const { + if (!is_initialized()) { + ICING_LOG(FATAL) << "DynamicTrie not initialized"; + } + + if (storage_->empty()) { + return false; + } + + uint32_t best_node_index; + int key_offset; + FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true); + const Node *cur_node = storage_->GetNode(best_node_index); + + if (cur_node->is_leaf()) { + return false; + } + + // key is not present in the trie. + if (key[key_offset] != '\0') { + return false; + } + + // Found key as an intermediate node, but key is not a valid term stored in + // the trie. + if (GetNextByChar(cur_node, '\0') == nullptr) { + return false; + } + + // The intermediate node for key must have more than two children for key to + // be a branching term, one of which represents the leaf node for key itself. + return cur_node->log2_num_children() > 1; +} + void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const { Stats stats; CollectStats(&stats); @@ -2500,7 +2536,26 @@ bool IcingDynamicTrie::Delete(const std::string_view key) { for (uint32_t next_index : nexts_to_reset) { ResetNext(next_index); } - SortNextArray(last_multichild_node); + + if (last_multichild_node != nullptr) { + SortNextArray(last_multichild_node); + uint32_t next_array_buffer_size = + 1u << last_multichild_node->log2_num_children(); + Next *next_array_start = this->storage_->GetMutableNextArray( + last_multichild_node->next_index(), next_array_buffer_size); + uint32_t num_children = + GetValidNextsSize(next_array_start, next_array_buffer_size); + // Shrink the next array if we can. + if (num_children == next_array_buffer_size / 2) { + Node *mutable_node = storage_->GetMutableNode( + storage_->GetNodeIndex(last_multichild_node)); + mutable_node->set_log2_num_children(mutable_node->log2_num_children() - + 1); + // Add the unused second half of the next array to the free list. + storage_->FreeNextArray(next_array_start + next_array_buffer_size / 2, + mutable_node->log2_num_children()); + } + } return true; } diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h index 013b926..ec8b31a 100644 --- a/icing/legacy/index/icing-dynamic-trie.h +++ b/icing/legacy/index/icing-dynamic-trie.h @@ -400,6 +400,16 @@ class IcingDynamicTrie : public IIcingStorage { // itself. If utf8 is true, does not cut key mid-utf8. std::vector<int> FindBranchingPrefixLengths(const char *key, bool utf8) const; + // Check if key is a branching term. + // + // key is a branching term, if and only if there exists terms s1 and s2 in the + // trie such that key is the maximum common prefix of s1 and s2, but s1 and s2 + // are not prefixes of each other. + // + // The function assumes that key is already present in the trie. Otherwise, + // false will be returned. + bool IsBranchingTerm(const char *key) const; + void GetDebugInfo(int verbosity, std::string *out) const override; double min_free_fraction() const; @@ -612,8 +622,11 @@ class IcingDynamicTrie : public IIcingStorage { // Helpers for Find and Insert. const Next *GetNextByChar(const Node *node, uint8_t key_char) const; - const Next *LowerBound(const Next *start, const Next *end, - uint8_t key_char) const; + const Next *LowerBound(const Next *start, const Next *end, uint8_t key_char, + uint32_t node_index = 0) const; + // Returns the number of valid nexts in the array. + int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start, + int next_array_length) const; void FindBestNode(const char *key, uint32_t *best_node_index, int *key_offset, bool prefix, bool utf8 = false) const; diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc index 193765b..b69ee64 100644 --- a/icing/legacy/index/icing-dynamic-trie_test.cc +++ b/icing/legacy/index/icing-dynamic-trie_test.cc @@ -20,6 +20,7 @@ #include <memory> #include <string> #include <unordered_map> +#include <unordered_set> #include <vector> #include "icing/text_classifier/lib3/utils/hash/farmhash.h" @@ -27,15 +28,18 @@ #include "gtest/gtest.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/legacy/index/icing-filesystem.h" +#include "icing/testing/random-string.h" #include "icing/testing/tmp-directory.h" - -using testing::ElementsAre; +#include "icing/util/logging.h" namespace icing { namespace lib { namespace { +using testing::ContainerEq; +using testing::ElementsAre; + constexpr std::string_view kKeys[] = { "", "ab", "ac", "abd", "bac", "bb", "bacd", "abbb", "abcdefg", }; @@ -962,6 +966,102 @@ TEST_F(IcingDynamicTrieTest, DeletingNonExistingKeyShouldReturnTrue) { EXPECT_TRUE(trie.Find("bed", &value)); } +TEST_F(IcingDynamicTrieTest, DeletionResortsFullNextArray) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + uint32_t value = 1; + // 'f' -> [ 'a', 'j', 'o', 'u' ] + ASSERT_TRUE(trie.Insert("foul", &value)); + ASSERT_TRUE(trie.Insert("far", &value)); + ASSERT_TRUE(trie.Insert("fudge", &value)); + ASSERT_TRUE(trie.Insert("fjord", &value)); + + // Delete the third child + EXPECT_TRUE(trie.Delete("foul")); + + std::vector<std::string> remaining; + for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/""); + term_iter.IsValid(); term_iter.Advance()) { + remaining.push_back(term_iter.GetKey()); + } + EXPECT_THAT(remaining, ElementsAre("far", "fjord", "fudge")); +} + +TEST_F(IcingDynamicTrieTest, DeletionResortsPartiallyFilledNextArray) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + uint32_t value = 1; + // 'f' -> [ 'a', 'o', 'u', 0xFF ] + ASSERT_TRUE(trie.Insert("foul", &value)); + ASSERT_TRUE(trie.Insert("far", &value)); + ASSERT_TRUE(trie.Insert("fudge", &value)); + + // Delete the second child + EXPECT_TRUE(trie.Delete("foul")); + + std::vector<std::string> remaining; + for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/""); + term_iter.IsValid(); term_iter.Advance()) { + remaining.push_back(term_iter.GetKey()); + } + EXPECT_THAT(remaining, ElementsAre("far", "fudge")); +} + +TEST_F(IcingDynamicTrieTest, DeletionLoadTest) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + std::default_random_engine random; + ICING_LOG(ERROR) << "Seed: " << std::default_random_engine::default_seed; + std::vector<std::string> terms; + uint32_t value; + // Randomly generate 2048 terms. + for (int i = 0; i < 2048; ++i) { + terms.push_back(RandomString("abcdefg", 5, &random)); + ASSERT_TRUE(trie.Insert(terms.back().c_str(), &value)); + } + + // Randomly delete 1024 terms. + std::unordered_set<std::string> exp_remaining(terms.begin(), terms.end()); + std::shuffle(terms.begin(), terms.end(), random); + for (int i = 0; i < 1024; ++i) { + exp_remaining.erase(terms[i]); + ASSERT_TRUE(trie.Delete(terms[i].c_str())); + } + + // Check that the iterator still works, and the remaining terms are correct. + std::unordered_set<std::string> remaining; + for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/""); + term_iter.IsValid(); term_iter.Advance()) { + remaining.insert(term_iter.GetKey()); + } + EXPECT_THAT(remaining, ContainerEq(exp_remaining)); + + // Check that we can still insert terms after delete. + for (int i = 0; i < 2048; ++i) { + std::string term = RandomString("abcdefg", 5, &random); + ASSERT_TRUE(trie.Insert(term.c_str(), &value)); + exp_remaining.insert(term); + } + remaining.clear(); + for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/""); + term_iter.IsValid(); term_iter.Advance()) { + remaining.insert(term_iter.GetKey()); + } + EXPECT_THAT(remaining, ContainerEq(exp_remaining)); +} + } // namespace // The tests below are accessing private methods and fields of IcingDynamicTrie @@ -1133,5 +1233,124 @@ TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) { ASSERT_EQ(0, trie.property_bitmaps_.size()); } +TEST_F(IcingDynamicTrieTest, IsBranchingTerm) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + uint32_t value = 1; + + ASSERT_TRUE(trie.Insert("", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("")); + + ASSERT_TRUE(trie.Insert("ab", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("")); + EXPECT_FALSE(trie.IsBranchingTerm("ab")); + + ASSERT_TRUE(trie.Insert("ac", &value)); + // "" is a prefix of "ab" and "ac", but it is not a branching term. + EXPECT_FALSE(trie.IsBranchingTerm("")); + EXPECT_FALSE(trie.IsBranchingTerm("ab")); + EXPECT_FALSE(trie.IsBranchingTerm("ac")); + + ASSERT_TRUE(trie.Insert("ba", &value)); + // "" now branches to "ba" + EXPECT_TRUE(trie.IsBranchingTerm("")); + EXPECT_FALSE(trie.IsBranchingTerm("ab")); + EXPECT_FALSE(trie.IsBranchingTerm("ac")); + EXPECT_FALSE(trie.IsBranchingTerm("ba")); + + ASSERT_TRUE(trie.Insert("a", &value)); + EXPECT_TRUE(trie.IsBranchingTerm("")); + // "a" branches to "ab" and "ac" + EXPECT_TRUE(trie.IsBranchingTerm("a")); + EXPECT_FALSE(trie.IsBranchingTerm("ab")); + EXPECT_FALSE(trie.IsBranchingTerm("ac")); + EXPECT_FALSE(trie.IsBranchingTerm("ba")); + + ASSERT_TRUE(trie.Insert("abc", &value)); + ASSERT_TRUE(trie.Insert("acd", &value)); + EXPECT_TRUE(trie.IsBranchingTerm("")); + EXPECT_TRUE(trie.IsBranchingTerm("a")); + // "ab" is a prefix of "abc", but it is not a branching term. + EXPECT_FALSE(trie.IsBranchingTerm("ab")); + // "ac" is a prefix of "acd", but it is not a branching term. + EXPECT_FALSE(trie.IsBranchingTerm("ac")); + EXPECT_FALSE(trie.IsBranchingTerm("ba")); + EXPECT_FALSE(trie.IsBranchingTerm("abc")); + EXPECT_FALSE(trie.IsBranchingTerm("acd")); + + ASSERT_TRUE(trie.Insert("abcd", &value)); + EXPECT_TRUE(trie.IsBranchingTerm("")); + EXPECT_TRUE(trie.IsBranchingTerm("a")); + // "ab" is a prefix of "abc" and "abcd", but it is not a branching term. + EXPECT_FALSE(trie.IsBranchingTerm("ab")); + EXPECT_FALSE(trie.IsBranchingTerm("ac")); + EXPECT_FALSE(trie.IsBranchingTerm("ba")); + // "abc" is a prefix of "abcd", but it is not a branching term. + EXPECT_FALSE(trie.IsBranchingTerm("abc")); + EXPECT_FALSE(trie.IsBranchingTerm("acd")); + EXPECT_FALSE(trie.IsBranchingTerm("abcd")); + + ASSERT_TRUE(trie.Insert("abd", &value)); + EXPECT_TRUE(trie.IsBranchingTerm("")); + EXPECT_TRUE(trie.IsBranchingTerm("a")); + // "ab" branches to "abc" and "abd" + EXPECT_TRUE(trie.IsBranchingTerm("ab")); + EXPECT_FALSE(trie.IsBranchingTerm("ac")); + EXPECT_FALSE(trie.IsBranchingTerm("ba")); + EXPECT_FALSE(trie.IsBranchingTerm("abc")); + EXPECT_FALSE(trie.IsBranchingTerm("acd")); + EXPECT_FALSE(trie.IsBranchingTerm("abcd")); + EXPECT_FALSE(trie.IsBranchingTerm("abd")); +} + +TEST_F(IcingDynamicTrieTest, IsBranchingTermShouldWorkForNonExistingTerms) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + uint32_t value = 1; + + EXPECT_FALSE(trie.IsBranchingTerm("")); + EXPECT_FALSE(trie.IsBranchingTerm("a")); + EXPECT_FALSE(trie.IsBranchingTerm("ab")); + + ASSERT_TRUE(trie.Insert("aa", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("")); + EXPECT_FALSE(trie.IsBranchingTerm("a")); + + ASSERT_TRUE(trie.Insert("", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("a")); + + ASSERT_TRUE(trie.Insert("ab", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("a")); + + ASSERT_TRUE(trie.Insert("ac", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("a")); + + ASSERT_TRUE(trie.Insert("ad", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("a")); + + ASSERT_TRUE(trie.Insert("abcd", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("abc")); + + ASSERT_TRUE(trie.Insert("abce", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("abc")); + + ASSERT_TRUE(trie.Insert("abcf", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("abc")); + + ASSERT_TRUE(trie.Insert("abc_suffix", &value)); + EXPECT_FALSE(trie.IsBranchingTerm("abc")); + EXPECT_FALSE(trie.IsBranchingTerm("abc_s")); + EXPECT_FALSE(trie.IsBranchingTerm("abc_su")); + EXPECT_FALSE(trie.IsBranchingTerm("abc_suffi")); +} + } // namespace lib } // namespace icing diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc index e48fe78..b505ac5 100644 --- a/icing/query/query-processor_benchmark.cc +++ b/icing/query/query-processor_benchmark.cc @@ -37,7 +37,7 @@ // //icing/query:query-processor_benchmark // // $ blaze-bin/icing/query/query-processor_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // Make target //icing/tokenization:language-segmenter depend on @@ -53,7 +53,7 @@ // $ adb push blaze-bin/icing/query/query-processor_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/query-processor_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/query-processor_benchmark --benchmark_filter=all // --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc index eaa0efc..a725213 100644 --- a/icing/query/query-processor_test.cc +++ b/icing/query/query-processor_test.cc @@ -127,22 +127,23 @@ class QueryProcessorTest : public Test { schema_store_.reset(); filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); } - Filesystem filesystem_; const std::string test_dir_; const std::string store_dir_; const std::string schema_store_dir_; + + private: + IcingFilesystem icing_filesystem_; + const std::string index_dir_; + + protected: std::unique_ptr<Index> index_; std::unique_ptr<LanguageSegmenter> language_segmenter_; std::unique_ptr<Normalizer> normalizer_; - std::unique_ptr<SchemaStore> schema_store_; - std::unique_ptr<DocumentStore> document_store_; FakeClock fake_clock_; std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache(); - - private: - IcingFilesystem icing_filesystem_; - const std::string index_dir_; + std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<DocumentStore> document_store_; }; TEST_F(QueryProcessorTest, CreationWithNullPointerShouldFail) { diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc index cfa53f6..b1a5a9e 100644 --- a/icing/query/suggestion-processor.cc +++ b/icing/query/suggestion-processor.cc @@ -93,4 +93,4 @@ SuggestionProcessor::SuggestionProcessor( normalizer_(*normalizer) {} } // namespace lib -} // namespace icing +} // namespace icing
\ No newline at end of file diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc index ba4c90a..b3012e9 100644 --- a/icing/query/suggestion-processor_test.cc +++ b/icing/query/suggestion-processor_test.cc @@ -99,16 +99,18 @@ class SuggestionProcessorTest : public Test { Filesystem filesystem_; const std::string test_dir_; const std::string store_dir_; + + private: + IcingFilesystem icing_filesystem_; + const std::string index_dir_; + + protected: std::unique_ptr<Index> index_; std::unique_ptr<LanguageSegmenter> language_segmenter_; std::unique_ptr<Normalizer> normalizer_; + FakeClock fake_clock_; std::unique_ptr<SchemaStore> schema_store_; std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache(); - FakeClock fake_clock_; - - private: - IcingFilesystem icing_filesystem_; - const std::string index_dir_; }; constexpr DocumentId kDocumentId0 = 0; diff --git a/icing/result/page-result.h b/icing/result/page-result.h new file mode 100644 index 0000000..6645593 --- /dev/null +++ b/icing/result/page-result.h @@ -0,0 +1,46 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_RESULT_PAGE_RESULT_H_ +#define ICING_RESULT_PAGE_RESULT_H_ + +#include <vector> + +#include "icing/proto/search.pb.h" + +namespace icing { +namespace lib { + +// Contains information of the search result of one page. +struct PageResult { + PageResult(std::vector<SearchResultProto::ResultProto> results_in, + int num_results_with_snippets_in, int requested_page_size_in) + : results(std::move(results_in)), + num_results_with_snippets(num_results_with_snippets_in), + requested_page_size(requested_page_size_in) {} + + // Results of one page + std::vector<SearchResultProto::ResultProto> results; + + // Number of results with snippets. + int num_results_with_snippets; + + // The page size for this query. This should always be >= results.size(). + int requested_page_size; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_RESULT_PAGE_RESULT_H_ diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h index b2e5ffc..8e38aaf 100644 --- a/icing/result/projection-tree.h +++ b/icing/result/projection-tree.h @@ -18,7 +18,6 @@ #include <string_view> #include <vector> -#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/proto/search.pb.h" namespace icing { @@ -31,14 +30,23 @@ class ProjectionTree { struct Node { explicit Node(std::string_view name = "") : name(name) {} + // TODO: change string_view to string std::string_view name; std::vector<Node> children; + + bool operator==(const Node& other) const { + return name == other.name && children == other.children; + } }; explicit ProjectionTree(const TypePropertyMask& type_field_mask); const Node& root() const { return root_; } + bool operator==(const ProjectionTree& other) const { + return root_ == other.root_; + } + private: // Add a child node with property_name to current_children and returns a // pointer to the child node. diff --git a/icing/result/result-retriever-v2.cc b/icing/result/result-retriever-v2.cc new file mode 100644 index 0000000..195f641 --- /dev/null +++ b/icing/result/result-retriever-v2.cc @@ -0,0 +1,175 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/result/result-retriever-v2.h" + +#include <memory> +#include <string_view> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/result/page-result.h" +#include "icing/result/projection-tree.h" +#include "icing/result/projector.h" +#include "icing/result/snippet-context.h" +#include "icing/result/snippet-retriever.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-store.h" +#include "icing/store/namespace-id.h" +#include "icing/tokenization/language-segmenter.h" +#include "icing/transform/normalizer.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +bool GroupResultLimiterV2::ShouldBeRemoved( + const ScoredDocumentHit& scored_document_hit, + const std::unordered_map<NamespaceId, int>& namespace_group_id_map, + const DocumentStore& document_store, + std::vector<int>& group_result_limits) const { + auto document_filter_data_optional = + document_store.GetAliveDocumentFilterData( + scored_document_hit.document_id()); + if (!document_filter_data_optional) { + // The document doesn't exist. + return true; + } + NamespaceId namespace_id = + document_filter_data_optional.value().namespace_id(); + auto iter = namespace_group_id_map.find(namespace_id); + if (iter == namespace_group_id_map.end()) { + // If a namespace id isn't found in namespace_group_id_map, then there are + // no limits placed on results from this namespace. + return false; + } + int& count = group_result_limits.at(iter->second); + if (count <= 0) { + return true; + } + --count; + return false; +} + +libtextclassifier3::StatusOr<std::unique_ptr<ResultRetrieverV2>> +ResultRetrieverV2::Create( + const DocumentStore* doc_store, const SchemaStore* schema_store, + const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, + std::unique_ptr<const GroupResultLimiterV2> group_result_limiter) { + ICING_RETURN_ERROR_IF_NULL(doc_store); + ICING_RETURN_ERROR_IF_NULL(schema_store); + ICING_RETURN_ERROR_IF_NULL(language_segmenter); + ICING_RETURN_ERROR_IF_NULL(normalizer); + ICING_RETURN_ERROR_IF_NULL(group_result_limiter); + + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<SnippetRetriever> snippet_retriever, + SnippetRetriever::Create(schema_store, language_segmenter, normalizer)); + + return std::unique_ptr<ResultRetrieverV2>( + new ResultRetrieverV2(doc_store, std::move(snippet_retriever), + std::move(group_result_limiter))); +} + +std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage( + ResultStateV2& result_state) const { + absl_ports::unique_lock l(&result_state.mutex); + + // For calculating page + int original_scored_document_hits_ranker_size = + result_state.scored_document_hits_ranker->size(); + int num_results_with_snippets = 0; + + const SnippetContext& snippet_context = result_state.snippet_context(); + const std::unordered_map<std::string, ProjectionTree>& projection_tree_map = + result_state.projection_tree_map(); + auto wildcard_projection_tree_itr = projection_tree_map.find( + std::string(ProjectionTree::kSchemaTypeWildcard)); + + // Calculates how many snippets to return for this page. + int remaining_num_to_snippet = + snippet_context.snippet_spec.num_to_snippet() - result_state.num_returned; + if (remaining_num_to_snippet < 0) { + remaining_num_to_snippet = 0; + } + + // Retrieve info + std::vector<SearchResultProto::ResultProto> results; + while (results.size() < result_state.num_per_page() && + !result_state.scored_document_hits_ranker->empty()) { + ScoredDocumentHit next_best_document_hit = + result_state.scored_document_hits_ranker->PopNext(); + if (group_result_limiter_->ShouldBeRemoved( + next_best_document_hit, result_state.namespace_group_id_map(), + doc_store_, result_state.group_result_limits)) { + continue; + } + + libtextclassifier3::StatusOr<DocumentProto> document_or = + doc_store_.Get(next_best_document_hit.document_id()); + if (!document_or.ok()) { + // Skip the document if getting errors. + ICING_LOG(WARNING) << "Fail to fetch document from document store: " + << document_or.status().error_message(); + continue; + } + + DocumentProto document = std::move(document_or).ValueOrDie(); + // Apply projection + auto itr = projection_tree_map.find(document.schema()); + if (itr != projection_tree_map.end()) { + projector::Project(itr->second.root().children, &document); + } else if (wildcard_projection_tree_itr != projection_tree_map.end()) { + projector::Project(wildcard_projection_tree_itr->second.root().children, + &document); + } + + SearchResultProto::ResultProto result; + // Add the snippet if requested. + if (snippet_context.snippet_spec.num_matches_per_property() > 0 && + remaining_num_to_snippet > results.size()) { + SnippetProto snippet_proto = snippet_retriever_->RetrieveSnippet( + snippet_context.query_terms, snippet_context.match_type, + snippet_context.snippet_spec, document, + next_best_document_hit.hit_section_id_mask()); + *result.mutable_snippet() = std::move(snippet_proto); + ++num_results_with_snippets; + } + + // Add the document, itself. + *result.mutable_document() = std::move(document); + result.set_score(next_best_document_hit.score()); + results.push_back(std::move(result)); + } + + // Update numbers in ResultState + result_state.num_returned += results.size(); + result_state.IncrementNumTotalHits( + result_state.scored_document_hits_ranker->size() - + original_scored_document_hits_ranker_size); + + bool has_more_results = !result_state.scored_document_hits_ranker->empty(); + + return std::make_pair( + PageResult(std::move(results), num_results_with_snippets, + result_state.num_per_page()), + has_more_results); +} + +} // namespace lib +} // namespace icing diff --git a/icing/result/result-retriever-v2.h b/icing/result/result-retriever-v2.h new file mode 100644 index 0000000..b481cfc --- /dev/null +++ b/icing/result/result-retriever-v2.h @@ -0,0 +1,108 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_RESULT_RETRIEVER_V2_H_ +#define ICING_RESULT_RETRIEVER_V2_H_ + +#include <memory> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/result/page-result.h" +#include "icing/result/result-state-v2.h" +#include "icing/result/snippet-retriever.h" +#include "icing/schema/schema-store.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-store.h" +#include "icing/store/namespace-id.h" +#include "icing/tokenization/language-segmenter.h" +#include "icing/transform/normalizer.h" + +namespace icing { +namespace lib { + +class GroupResultLimiterV2 { + public: + GroupResultLimiterV2() {} + + virtual ~GroupResultLimiterV2() = default; + + // Returns true if the scored_document_hit should be removed. + virtual bool ShouldBeRemoved( + const ScoredDocumentHit& scored_document_hit, + const std::unordered_map<NamespaceId, int>& namespace_group_id_map, + const DocumentStore& document_store, + std::vector<int>& group_result_limits) const; +}; + +class ResultRetrieverV2 { + public: + // Factory function to create a ResultRetrieverV2 which does not take + // ownership of any input components, and all pointers must refer to valid + // objects that outlive the created ResultRetrieverV2 instance. + // + // Returns: + // A ResultRetrieverV2 on success + // FAILED_PRECONDITION on any null pointer input + static libtextclassifier3::StatusOr<std::unique_ptr<ResultRetrieverV2>> + Create(const DocumentStore* doc_store, const SchemaStore* schema_store, + const LanguageSegmenter* language_segmenter, + const Normalizer* normalizer, + std::unique_ptr<const GroupResultLimiterV2> group_result_limiter = + std::make_unique<const GroupResultLimiterV2>()); + + // Retrieves results (pairs of DocumentProtos and SnippetProtos) with the + // given ResultState which holds document and snippet information. It pulls + // out the next top rank documents from ResultState, retrieves the documents + // from storage, updates ResultState, and finally wraps the result + other + // information into PageResult. The expected number of documents to return is + // min(num_per_page, the number of all scored document hits) inside + // ResultState. + // + // The number of snippets to return is based on the total number of snippets + // needed and number of snippets that have already been returned previously + // for the same query. The order of results returned will be sorted by + // scored_document_hit_comparator inside ResultState. + // + // An additional boolean value will be returned, indicating if ResultState has + // remaining documents to be retrieved next round. + // + // All errors will be ignored. It will keep retrieving the next document and + // valid documents will be included in PageResult. + // + // Returns: + // std::pair<PageResult, bool> + std::pair<PageResult, bool> RetrieveNextPage( + ResultStateV2& result_state) const; + + private: + explicit ResultRetrieverV2( + const DocumentStore* doc_store, + std::unique_ptr<SnippetRetriever> snippet_retriever, + std::unique_ptr<const GroupResultLimiterV2> group_result_limiter) + : doc_store_(*doc_store), + snippet_retriever_(std::move(snippet_retriever)), + group_result_limiter_(std::move(group_result_limiter)) {} + + const DocumentStore& doc_store_; + std::unique_ptr<SnippetRetriever> snippet_retriever_; + const std::unique_ptr<const GroupResultLimiterV2> group_result_limiter_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_RESULT_RETRIEVER_V2_H_ diff --git a/icing/result/result-retriever-v2_group-result-limiter-test.cc b/icing/result/result-retriever-v2_group-result-limiter-test.cc new file mode 100644 index 0000000..e4bfe09 --- /dev/null +++ b/icing/result/result-retriever-v2_group-result-limiter-test.cc @@ -0,0 +1,639 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <memory> +#include <vector> + +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/result/page-result.h" +#include "icing/result/result-retriever-v2.h" +#include "icing/result/result-state-v2.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/scoring/priority-queue-scored-document-hits-ranker.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-id.h" +#include "icing/store/namespace-id.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/transform/normalizer-factory.h" +#include "icing/transform/normalizer.h" +#include "unicode/uloc.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Pair; +using ::testing::Pointee; +using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; + +class ResultRetrieverV2GroupResultLimiterTest : public testing::Test { + protected: + ResultRetrieverV2GroupResultLimiterTest() + : test_dir_(GetTestTempDir() + "/icing") { + filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); + } + + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + ICING_ASSERT_OK( + // File generated via icu_data_file rule in //icing/BUILD. + icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + language_segmenter_factory::SegmenterOptions options(ULOC_US); + ICING_ASSERT_OK_AND_ASSIGN( + language_segmenter_, + language_segmenter_factory::Create(std::move(options))); + + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( + /*max_term_byte_size=*/10000)); + + SchemaProto schema; + schema.add_types()->set_schema_type("Document"); + ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema))); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + document_store_ = std::move(create_result.document_store); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); + } + + const Filesystem filesystem_; + const std::string test_dir_; + std::unique_ptr<LanguageSegmenter> language_segmenter_; + std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<Normalizer> normalizer_; + std::unique_ptr<DocumentStore> document_store_; + FakeClock fake_clock_; +}; + +// TODO(sungyc): Refactor helper functions below (builder classes or common test +// utility). + +SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { + SearchSpecProto search_spec; + search_spec.set_term_match_type(match_type); + return search_spec; +} + +ScoringSpecProto CreateScoringSpec(bool is_descending_order) { + ScoringSpecProto scoring_spec; + scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC + : ScoringSpecProto::Order::ASC); + return scoring_spec; +} + +ResultSpecProto CreateResultSpec(int num_per_page) { + ResultSpecProto result_spec; + result_spec.set_num_per_page(num_per_page); + return result_spec; +} + +TEST_F(ResultRetrieverV2GroupResultLimiterTest, + ResultGroupingShouldLimitResults) { + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Document") + .SetScore(1) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document1)); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Document") + .SetScore(2) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document2)); + + std::vector<ScoredDocumentHit> scored_document_hits = { + ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()), + ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())}; + + // Create a ResultSpec that limits "namespace" to a single result. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + result_grouping->add_namespaces("namespace"); + + // Creates a ResultState with 2 ScoredDocumentHits. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Only the top ranked document in "namespace" (document2), should be + // returned. + auto [page_result, has_more_results] = + result_retriever->RetrieveNextPage(result_state); + ASSERT_THAT(page_result.results, SizeIs(1)); + EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2)); + // Document1 has not been returned due to GroupResultLimiter, but since it was + // "filtered out", there should be no more results. + EXPECT_FALSE(has_more_results); +} + +TEST_F(ResultRetrieverV2GroupResultLimiterTest, + ResultGroupingDoesNotLimitOtherNamespaceResults) { + // Creates 4 documents and ensures the relationship in terms of document + // score is: document1 < document2 < document3 < document4 + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Document") + .SetScore(1) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document1)); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri/2") + .SetSchema("Document") + .SetScore(2) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document2)); + + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace2", "uri/3") + .SetSchema("Document") + .SetScore(3) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(document3)); + + DocumentProto document4 = DocumentBuilder() + .SetKey("namespace2", "uri/4") + .SetSchema("Document") + .SetScore(4) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, + document_store_->Put(document4)); + + std::vector<ScoredDocumentHit> scored_document_hits = { + ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()), + ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()), + ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()), + ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score())}; + + // Create a ResultSpec that limits "namespace1" to a single result, but + // doesn't limit "namespace2". + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + result_grouping->add_namespaces("namespace1"); + + // Creates a ResultState with 4 ScoredDocumentHits. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // All documents in "namespace2" should be returned. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document4)); + EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document3)); + EXPECT_THAT(page_result.results.at(2).document(), EqualsProto(document2)); +} + +TEST_F(ResultRetrieverV2GroupResultLimiterTest, + ResultGroupingNonexistentNamespaceShouldBeIgnored) { + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Document") + .SetScore(1) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document1)); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Document") + .SetScore(2) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document2)); + + std::vector<ScoredDocumentHit> scored_document_hits = { + ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()), + ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())}; + + // Create a ResultSpec that limits "namespace"+"nonExistentNamespace" to a + // single result. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + result_grouping->add_namespaces("namespace"); + result_grouping->add_namespaces("nonexistentNamespace"); + + // Creates a ResultState with 2 ScoredDocumentHits. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Only the top ranked document in "namespace" (document2), should be + // returned. The presence of "nonexistentNamespace" in the same result + // grouping should have no effect. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(1)); + EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2)); +} + +TEST_F(ResultRetrieverV2GroupResultLimiterTest, + ResultGroupingMultiNamespaceGrouping) { + // Creates 6 documents and ensures the relationship in terms of document + // score is: document1 < document2 < document3 < document4 < document5 < + // document6 + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Document") + .SetScore(1) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document1)); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri/2") + .SetSchema("Document") + .SetScore(2) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document2)); + + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace2", "uri/3") + .SetSchema("Document") + .SetScore(3) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(document3)); + + DocumentProto document4 = DocumentBuilder() + .SetKey("namespace2", "uri/4") + .SetSchema("Document") + .SetScore(4) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, + document_store_->Put(document4)); + + DocumentProto document5 = DocumentBuilder() + .SetKey("namespace3", "uri/5") + .SetSchema("Document") + .SetScore(5) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, + document_store_->Put(document5)); + + DocumentProto document6 = DocumentBuilder() + .SetKey("namespace3", "uri/6") + .SetSchema("Document") + .SetScore(6) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6, + document_store_->Put(document6)); + + std::vector<ScoredDocumentHit> scored_document_hits = { + ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()), + ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()), + ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()), + ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score()), + ScoredDocumentHit(document_id5, kSectionIdMaskNone, document5.score()), + ScoredDocumentHit(document_id6, kSectionIdMaskNone, document6.score())}; + + // Create a ResultSpec that limits "namespace1" to a single result and limits + // "namespace2"+"namespace3" to a total of two results. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + result_grouping->add_namespaces("namespace1"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(2); + result_grouping->add_namespaces("namespace2"); + result_grouping->add_namespaces("namespace3"); + + // Creates a ResultState with 6 ScoredDocumentHits. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Only the top-ranked result in "namespace1" (document2) should be returned. + // Only the top-ranked results across "namespace2" and "namespace3" + // (document6, document5) should be returned. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document6)); + EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document5)); + EXPECT_THAT(page_result.results.at(2).document(), EqualsProto(document2)); +} + +TEST_F(ResultRetrieverV2GroupResultLimiterTest, + ResultGroupingOnlyNonexistentNamespaces) { + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Document") + .SetScore(1) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document1)); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Document") + .SetScore(2) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document2)); + + std::vector<ScoredDocumentHit> scored_document_hits = { + ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()), + ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())}; + + // Create a ResultSpec that limits "nonexistentNamespace" to a single result. + // but doesn't limit "namespace" + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + result_grouping->add_namespaces("nonexistentNamespace"); + + // Creates a ResultState with 2 ScoredDocumentHits. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // All documents in "namespace" should be returned. The presence of + // "nonexistentNamespace" should have no effect. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2)); + EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document1)); +} + +TEST_F(ResultRetrieverV2GroupResultLimiterTest, + ShouldUpdateResultStateCorrectlyWithGroupResultLimiter) { + // Creates 5 documents and ensures the relationship in terms of document + // score is: document1 < document2 < document3 < document4 < document5 + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace2", "uri/1") + .SetSchema("Document") + .SetScore(1) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document1)); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri/2") + .SetSchema("Document") + .SetScore(2) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document2)); + + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace1", "uri/3") + .SetSchema("Document") + .SetScore(3) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(document3)); + + DocumentProto document4 = DocumentBuilder() + .SetKey("namespace2", "uri/4") + .SetSchema("Document") + .SetScore(4) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, + document_store_->Put(document4)); + + DocumentProto document5 = DocumentBuilder() + .SetKey("namespace2", "uri/5") + .SetSchema("Document") + .SetScore(5) + .SetCreationTimestampMs(1000) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, + document_store_->Put(document5)); + + std::vector<ScoredDocumentHit> scored_document_hits = { + ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()), + ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()), + ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()), + ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score()), + ScoredDocumentHit(document_id5, kSectionIdMaskNone, document5.score())}; + + // Create a ResultSpec that limits "namespace1" to 3 results and "namespace2" + // to a single result. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + result_grouping->set_max_results(3); + result_grouping->add_namespaces("namespace1"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + result_grouping->add_namespaces("namespace2"); + + // Get namespace ids. + ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id1, + document_store_->GetNamespaceId("namespace1")); + ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id2, + document_store_->GetNamespaceId("namespace2")); + + // Creates a ResultState with 5 ScoredDocumentHits. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + *document_store_); + { + absl_ports::shared_lock l(&result_state.mutex); + + ASSERT_THAT( + result_state.namespace_group_id_map(), + UnorderedElementsAre(Pair(namespace_id1, 0), Pair(namespace_id2, 1))); + ASSERT_THAT(result_state.group_result_limits, ElementsAre(3, 1)); + } + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // document5, document4, document1 belong to namespace2 (with max_results = + // 1). + // docuemnt3, document2 belong to namespace 1 (with max_results = 3). + // Since num_per_page is 2, we expect to get document5 and document3 in the + // first page. + auto [page_result1, has_more_results1] = + result_retriever->RetrieveNextPage(result_state); + ASSERT_THAT(page_result1.results, SizeIs(2)); + ASSERT_THAT(page_result1.results.at(0).document(), EqualsProto(document5)); + ASSERT_THAT(page_result1.results.at(1).document(), EqualsProto(document3)); + ASSERT_TRUE(has_more_results1); + { + absl_ports::shared_lock l(&result_state.mutex); + + // Should remove document5, document4 and document3 from + // scored_document_hits. It removes more than num_per_page documents because + // document4 is filtered out by GroupResultLimiter and ResultRetriever has + // to fetch the next one until returning num_per_page documents or no + // remaining documents in scored_document_hits. + ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone, + document1.score()); + ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone, + document2.score()); + EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(2))); + + // Even though we removed 3 document hits from scored_document_hits this + // round, num_returned should still be 2, since document4 was "filtered out" + // and should not be counted into num_returned. + EXPECT_THAT(result_state.num_returned, Eq(2)); + // namespace_group_id_map should be unchanged. + EXPECT_THAT( + result_state.namespace_group_id_map(), + UnorderedElementsAre(Pair(namespace_id1, 0), Pair(namespace_id2, 1))); + // GroupResultLimiter should decrement the # in group_result_limits. + EXPECT_THAT(result_state.group_result_limits, ElementsAre(2, 0)); + } + + // Although there are document2 and document1 left, since namespace2 has + // reached its max results, document1 should be excluded from the second page. + auto [page_result2, has_more_results2] = + result_retriever->RetrieveNextPage(result_state); + ASSERT_THAT(page_result2.results, SizeIs(1)); + ASSERT_THAT(page_result2.results.at(0).document(), EqualsProto(document2)); + ASSERT_FALSE(has_more_results2); + { + absl_ports::shared_lock l(&result_state.mutex); + + // Should remove document2 and document1 from scored_document_hits. + EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(IsEmpty())); + // Even though we removed 2 document hits from scored_document_hits this + // round, num_returned should only be incremented by 1 (and thus become 3), + // since document1 was "filtered out" and should not be counted into + // num_returned. + EXPECT_THAT(result_state.num_returned, Eq(3)); + // namespace_group_id_map should be unchanged. + EXPECT_THAT( + result_state.namespace_group_id_map(), + UnorderedElementsAre(Pair(namespace_id1, 0), Pair(namespace_id2, 1))); + // GroupResultLimiter should decrement the # in group_result_limits. + EXPECT_THAT(result_state.group_result_limits, ElementsAre(1, 0)); + } +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/result/result-retriever-v2_projection-test.cc b/icing/result/result-retriever-v2_projection-test.cc new file mode 100644 index 0000000..bdd1715 --- /dev/null +++ b/icing/result/result-retriever-v2_projection-test.cc @@ -0,0 +1,1281 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <memory> +#include <vector> + +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/result/page-result.h" +#include "icing/result/projection-tree.h" +#include "icing/result/result-retriever-v2.h" +#include "icing/result/result-state-v2.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/scoring/priority-queue-scored-document-hits-ranker.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-id.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/transform/normalizer-factory.h" +#include "icing/transform/normalizer.h" +#include "unicode/uloc.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::SizeIs; + +constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL = + PropertyConfigProto::Cardinality::OPTIONAL; + +constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN = + StringIndexingConfig::TokenizerType::PLAIN; + +constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY; +constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX; + +class ResultRetrieverV2ProjectionTest : public testing::Test { + protected: + ResultRetrieverV2ProjectionTest() : test_dir_(GetTestTempDir() + "/icing") { + filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); + } + + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + ICING_ASSERT_OK( + // File generated via icu_data_file rule in //icing/BUILD. + icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + language_segmenter_factory::SegmenterOptions options(ULOC_US); + ICING_ASSERT_OK_AND_ASSIGN( + language_segmenter_, + language_segmenter_factory::Create(std::move(options))); + + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( + /*max_term_byte_size=*/10000)); + + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(MATCH_EXACT, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + document_store_ = std::move(create_result.document_store); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); + } + + SectionId GetSectionId(const std::string& type, const std::string& property) { + auto type_id_or = schema_store_->GetSchemaTypeId(type); + if (!type_id_or.ok()) { + return kInvalidSectionId; + } + SchemaTypeId type_id = type_id_or.ValueOrDie(); + for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) { + auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id); + if (!metadata_or.ok()) { + break; + } + const SectionMetadata* metadata = metadata_or.ValueOrDie(); + if (metadata->path == property) { + return metadata->id; + } + } + return kInvalidSectionId; + } + + const Filesystem filesystem_; + const std::string test_dir_; + std::unique_ptr<LanguageSegmenter> language_segmenter_; + std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<Normalizer> normalizer_; + std::unique_ptr<DocumentStore> document_store_; + FakeClock fake_clock_; +}; + +// TODO(sungyc): Refactor helper functions below (builder classes or common test +// utility). + +SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) { + SectionIdMask mask = 0; + for (SectionId section_id : section_ids) { + mask |= (1u << section_id); + } + return mask; +} + +SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { + SearchSpecProto search_spec; + search_spec.set_term_match_type(match_type); + return search_spec; +} + +ScoringSpecProto CreateScoringSpec(bool is_descending_order) { + ScoringSpecProto scoring_spec; + scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC + : ScoringSpecProto::Order::ASC); + return scoring_spec; +} + +ResultSpecProto CreateResultSpec(int num_per_page) { + ResultSpecProto result_spec; + result_spec.set_num_per_page(num_per_page); + return result_spec; +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTopLevelLeadNodeFieldPath) { + // 1. Add two Email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("name"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned results only contain the 'name' property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Goodnight Moon!") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionNestedLeafNodeFieldPath) { + // 1. Add two Email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("name", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("sender.name"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned results only contain the 'sender.name' + // property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .Build()) + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .Build()) + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionIntermediateNodeFieldPath) { + // 1. Add two Email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("name", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("sender"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned results only contain the 'sender' + // property and all of the subproperties of 'sender'. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleNestedFieldPaths) { + // 1. Add two Email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("name", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("sender.name"); + type_property_mask->add_paths("sender.emailAddress"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned results only contain the 'sender.name' and + // 'sender.address' properties. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionEmptyFieldPath) { + // 1. Add two Email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned results contain *no* properties. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionInvalidFieldPath) { + // 1. Add two Email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("nonExistentProperty"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned results contain *no* properties. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionValidAndInvalidFieldPath) { + // 1. Add two Email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("name"); + type_property_mask->add_paths("nonExistentProperty"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned results only contain the 'name' property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Goodnight Moon!") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesNoWildcards) { + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("name"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned Email results only contain the 'name' + // property and the returned Person results have all of their properties. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) { + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* wildcard_type_property_mask = + result_spec.add_type_property_masks(); + wildcard_type_property_mask->set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask->add_paths("name"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned Email results only contain the 'name' + // property and the returned Person results only contain the 'name' property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, + ProjectionMultipleTypesWildcardWithOneOverride) { + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* email_type_property_mask = + result_spec.add_type_property_masks(); + email_type_property_mask->set_schema_type("Email"); + email_type_property_mask->add_paths("body"); + TypePropertyMask* wildcard_type_property_mask = + result_spec.add_type_property_masks(); + wildcard_type_property_mask->set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask->add_paths("name"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned Email results only contain the 'body' + // property and the returned Person results only contain the 'name' property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, + ProjectionSingleTypesWildcardAndOverride) { + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .AddStringProperty("name", "Mr. Body") + .AddStringProperty("emailAddress", "mr.body123@gmail.com") + .Build()) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* email_type_property_mask = + result_spec.add_type_property_masks(); + email_type_property_mask->set_schema_type("Email"); + email_type_property_mask->add_paths("sender.name"); + TypePropertyMask* wildcard_type_property_mask = + result_spec.add_type_property_masks(); + wildcard_type_property_mask->set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask->add_paths("name"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned Email results only contain the 'sender.name' + // property and the returned Person results only contain the 'name' property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .AddStringProperty("name", "Mr. Body") + .Build()) + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, + ProjectionSingleTypesWildcardAndOverrideNestedProperty) { + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .AddStringProperty("name", "Mr. Body") + .AddStringProperty("emailAddress", "mr.body123@gmail.com") + .Build()) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* email_type_property_mask = + result_spec.add_type_property_masks(); + email_type_property_mask->set_schema_type("Email"); + email_type_property_mask->add_paths("sender.name"); + TypePropertyMask* wildcard_type_property_mask = + result_spec.add_type_property_masks(); + wildcard_type_property_mask->set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask->add_paths("sender"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned Email results only contain the 'sender.name' + // property and the returned Person results contain no properties. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .AddStringProperty("name", "Mr. Body") + .Build()) + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/result/result-retriever-v2_snippet-test.cc b/icing/result/result-retriever-v2_snippet-test.cc new file mode 100644 index 0000000..afb31cf --- /dev/null +++ b/icing/result/result-retriever-v2_snippet-test.cc @@ -0,0 +1,573 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <limits> +#include <memory> +#include <string_view> +#include <vector> + +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/result/page-result.h" +#include "icing/result/result-retriever-v2.h" +#include "icing/result/result-state-v2.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/scoring/priority-queue-scored-document-hits-ranker.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-id.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/snippet-helpers.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/transform/normalizer-factory.h" +#include "icing/transform/normalizer.h" +#include "unicode/uloc.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL = + PropertyConfigProto::Cardinality::OPTIONAL; + +constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN = + StringIndexingConfig::TokenizerType::PLAIN; + +constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY; +constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX; + +class ResultRetrieverV2SnippetTest : public testing::Test { + protected: + ResultRetrieverV2SnippetTest() : test_dir_(GetTestTempDir() + "/icing") { + filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); + } + + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + ICING_ASSERT_OK( + // File generated via icu_data_file rule in //icing/BUILD. + icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + language_segmenter_factory::SegmenterOptions options(ULOC_US); + ICING_ASSERT_OK_AND_ASSIGN( + language_segmenter_, + language_segmenter_factory::Create(std::move(options))); + + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( + /*max_term_byte_size=*/10000)); + + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(MATCH_EXACT, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + document_store_ = std::move(create_result.document_store); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); + } + + SectionId GetSectionId(const std::string& type, const std::string& property) { + auto type_id_or = schema_store_->GetSchemaTypeId(type); + if (!type_id_or.ok()) { + return kInvalidSectionId; + } + SchemaTypeId type_id = type_id_or.ValueOrDie(); + for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) { + auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id); + if (!metadata_or.ok()) { + break; + } + const SectionMetadata* metadata = metadata_or.ValueOrDie(); + if (metadata->path == property) { + return metadata->id; + } + } + return kInvalidSectionId; + } + + const Filesystem filesystem_; + const std::string test_dir_; + std::unique_ptr<LanguageSegmenter> language_segmenter_; + std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<Normalizer> normalizer_; + std::unique_ptr<DocumentStore> document_store_; + FakeClock fake_clock_; +}; + +// TODO(sungyc): Refactor helper functions below (builder classes or common test +// utility). + +ResultSpecProto::SnippetSpecProto CreateSnippetSpec() { + ResultSpecProto::SnippetSpecProto snippet_spec; + snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max()); + snippet_spec.set_num_matches_per_property(std::numeric_limits<int>::max()); + snippet_spec.set_max_window_utf32_length(1024); + return snippet_spec; +} + +DocumentProto CreateDocument(int id) { + return DocumentBuilder() + .SetKey("icing", "Email/" + std::to_string(id)) + .SetSchema("Email") + .AddStringProperty("name", "subject foo " + std::to_string(id)) + .AddStringProperty("body", "body bar " + std::to_string(id)) + .SetCreationTimestampMs(1574365086666 + id) + .Build(); +} + +SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) { + SectionIdMask mask = 0; + for (SectionId section_id : section_ids) { + mask |= (1u << section_id); + } + return mask; +} + +SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { + SearchSpecProto search_spec; + search_spec.set_term_match_type(match_type); + return search_spec; +} + +ScoringSpecProto CreateScoringSpec(bool is_descending_order) { + ScoringSpecProto scoring_spec; + scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC + : ScoringSpecProto::Order::ASC); + return scoring_spec; +} + +ResultSpecProto CreateResultSpec(int num_per_page) { + ResultSpecProto result_spec; + result_spec.set_num_per_page(num_per_page); + return result_spec; +} + +TEST_F(ResultRetrieverV2SnippetTest, + DefaultSnippetSpecShouldDisableSnippeting) { + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(CreateDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(CreateDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/3), *document_store_); + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.results.at(0).snippet(), + EqualsProto(SnippetProto::default_instance())); + EXPECT_THAT(page_result.results.at(1).snippet(), + EqualsProto(SnippetProto::default_instance())); + EXPECT_THAT(page_result.results.at(2).snippet(), + EqualsProto(SnippetProto::default_instance())); + EXPECT_THAT(page_result.num_results_with_snippets, Eq(0)); +} + +TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(CreateDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(CreateDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create ResultSpec with custom snippet spec. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3); + *result_spec.mutable_snippet_spec() = CreateSnippetSpec(); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/{{"", {"foo", "bar"}}}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.num_results_with_snippets, Eq(3)); + + const DocumentProto& result_document_one = + page_result.results.at(0).document(); + const SnippetProto& result_snippet_one = page_result.results.at(0).snippet(); + EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1))); + EXPECT_THAT(result_snippet_one.entries(), SizeIs(2)); + EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body")); + std::string_view content = GetString( + &result_document_one, result_snippet_one.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)), + ElementsAre("body bar 1")); + EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)), + ElementsAre("bar")); + EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name")); + content = GetString(&result_document_one, + result_snippet_one.entries(1).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)), + ElementsAre("subject foo 1")); + EXPECT_THAT(GetMatches(content, result_snippet_one.entries(1)), + ElementsAre("foo")); + + const DocumentProto& result_document_two = + page_result.results.at(1).document(); + const SnippetProto& result_snippet_two = page_result.results.at(1).snippet(); + EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2))); + EXPECT_THAT(result_snippet_two.entries(), SizeIs(2)); + EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body")); + content = GetString(&result_document_two, + result_snippet_two.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)), + ElementsAre("body bar 2")); + EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)), + ElementsAre("bar")); + EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name")); + content = GetString(&result_document_two, + result_snippet_two.entries(1).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)), + ElementsAre("subject foo 2")); + EXPECT_THAT(GetMatches(content, result_snippet_two.entries(1)), + ElementsAre("foo")); + + const DocumentProto& result_document_three = + page_result.results.at(2).document(); + const SnippetProto& result_snippet_three = + page_result.results.at(2).snippet(); + EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3))); + EXPECT_THAT(result_snippet_three.entries(), SizeIs(2)); + EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body")); + content = GetString(&result_document_three, + result_snippet_three.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet_three.entries(0)), + ElementsAre("body bar 3")); + EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)), + ElementsAre("bar")); + EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name")); + content = GetString(&result_document_three, + result_snippet_three.entries(1).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)), + ElementsAre("subject foo 3")); + EXPECT_THAT(GetMatches(content, result_snippet_three.entries(1)), + ElementsAre("foo")); +} + +TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) { + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(CreateDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(CreateDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); + snippet_spec.set_num_to_snippet(1); + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3); + *result_spec.mutable_snippet_spec() = std::move(snippet_spec); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/{{"", {"foo", "bar"}}}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.num_results_with_snippets, Eq(1)); + + const DocumentProto& result_document = page_result.results.at(0).document(); + const SnippetProto& result_snippet = page_result.results.at(0).snippet(); + EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1))); + EXPECT_THAT(result_snippet.entries(), SizeIs(2)); + EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body")); + std::string_view content = + GetString(&result_document, result_snippet.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet.entries(0)), + ElementsAre("body bar 1")); + EXPECT_THAT(GetMatches(content, result_snippet.entries(0)), + ElementsAre("bar")); + EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name")); + content = + GetString(&result_document, result_snippet.entries(1).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet.entries(1)), + ElementsAre("subject foo 1")); + EXPECT_THAT(GetMatches(content, result_snippet.entries(1)), + ElementsAre("foo")); + + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(CreateDocument(/*id=*/2))); + EXPECT_THAT(page_result.results.at(1).snippet(), + EqualsProto(SnippetProto::default_instance())); + + EXPECT_THAT(page_result.results.at(2).document(), + EqualsProto(CreateDocument(/*id=*/3))); + EXPECT_THAT(page_result.results.at(2).snippet(), + EqualsProto(SnippetProto::default_instance())); +} + +TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) { + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(CreateDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(CreateDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); + snippet_spec.set_num_to_snippet(5); + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3); + *result_spec.mutable_snippet_spec() = std::move(snippet_spec); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/{{"", {"foo", "bar"}}}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + // num_to_snippet = 5, num_previously_returned_in = 0, + // We can return 5 - 0 = 5 snippets at most. We're able to return all 3 + // snippets here. + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty())); + EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty())); + EXPECT_THAT(page_result.results.at(2).snippet().entries(), Not(IsEmpty())); + EXPECT_THAT(page_result.num_results_with_snippets, Eq(3)); +} + +TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) { + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(CreateDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(CreateDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); + snippet_spec.set_num_to_snippet(5); + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3); + *result_spec.mutable_snippet_spec() = std::move(snippet_spec); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/{{"", {"foo", "bar"}}}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + { + absl_ports::unique_lock l(&result_state.mutex); + + // Set (previously) num_returned = 3 docs + result_state.num_returned = 3; + } + + // num_to_snippet = 5, (previously) num_returned = 3, + // We can return 5 - 3 = 2 snippets. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty())); + EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty())); + EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty()); + EXPECT_THAT(page_result.num_results_with_snippets, Eq(2)); +} + +TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) { + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(CreateDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + document_store_->Put(CreateDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); + snippet_spec.set_num_to_snippet(5); + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3); + *result_spec.mutable_snippet_spec() = std::move(snippet_spec); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/false), + /*query_terms=*/{{"", {"foo", "bar"}}}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + *document_store_); + { + absl_ports::unique_lock l(&result_state.mutex); + + // Set (previously) num_returned = 6 docs + result_state.num_returned = 6; + } + + // num_to_snippet = 5, (previously) num_returned = 6, + // We can't return any snippets for this page. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.results.at(0).snippet().entries(), IsEmpty()); + EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty()); + EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty()); + EXPECT_THAT(page_result.num_results_with_snippets, Eq(0)); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc new file mode 100644 index 0000000..f23a88a --- /dev/null +++ b/icing/result/result-retriever-v2_test.cc @@ -0,0 +1,641 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/result/result-retriever-v2.h" + +#include <atomic> +#include <memory> +#include <unordered_map> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/mock-filesystem.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/result/page-result.h" +#include "icing/result/result-state-v2.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/scoring/priority-queue-scored-document-hits-ranker.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-id.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/transform/normalizer-factory.h" +#include "icing/transform/normalizer.h" +#include "unicode/uloc.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::DoDefault; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Pointee; +using ::testing::Return; +using ::testing::SizeIs; +using NamespaceIdMap = std::unordered_map<NamespaceId, int>; + +constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL = + PropertyConfigProto::Cardinality::OPTIONAL; + +constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN = + StringIndexingConfig::TokenizerType::PLAIN; + +constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY; +constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX; + +// Mock the behavior of GroupResultLimiter::ShouldBeRemoved. +class MockGroupResultLimiter : public GroupResultLimiterV2 { + public: + MockGroupResultLimiter() : GroupResultLimiterV2() { + ON_CALL(*this, ShouldBeRemoved).WillByDefault(Return(false)); + } + + MOCK_METHOD(bool, ShouldBeRemoved, + (const ScoredDocumentHit&, const NamespaceIdMap&, + const DocumentStore&, std::vector<int>&), + (const, override)); +}; + +class ResultRetrieverV2Test : public ::testing::Test { + protected: + ResultRetrieverV2Test() : test_dir_(GetTestTempDir() + "/icing") { + filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); + } + + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + ICING_ASSERT_OK( + // File generated via icu_data_file rule in //icing/BUILD. + icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + language_segmenter_factory::SegmenterOptions options(ULOC_US); + ICING_ASSERT_OK_AND_ASSIGN( + language_segmenter_, + language_segmenter_factory::Create(std::move(options))); + + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( + /*max_term_byte_size=*/10000)); + + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(MATCH_EXACT, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + + num_total_hits_ = 0; + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); + } + + SectionId GetSectionId(const std::string& type, const std::string& property) { + auto type_id_or = schema_store_->GetSchemaTypeId(type); + if (!type_id_or.ok()) { + return kInvalidSectionId; + } + SchemaTypeId type_id = type_id_or.ValueOrDie(); + for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) { + auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id); + if (!metadata_or.ok()) { + break; + } + const SectionMetadata* metadata = metadata_or.ValueOrDie(); + if (metadata->path == property) { + return metadata->id; + } + } + return kInvalidSectionId; + } + + const Filesystem filesystem_; + const std::string test_dir_; + std::unique_ptr<LanguageSegmenter> language_segmenter_; + std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<Normalizer> normalizer_; + std::atomic<int> num_total_hits_; + FakeClock fake_clock_; +}; + +// TODO(sungyc): Refactor helper functions below (builder classes or common test +// utility). + +DocumentProto CreateDocument(int id) { + return DocumentBuilder() + .SetKey("icing", "Email/" + std::to_string(id)) + .SetSchema("Email") + .AddStringProperty("name", "subject foo " + std::to_string(id)) + .AddStringProperty("body", "body bar " + std::to_string(id)) + .SetCreationTimestampMs(1574365086666 + id) + .Build(); +} + +SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) { + SectionIdMask mask = 0; + for (SectionId section_id : section_ids) { + mask |= (1u << section_id); + } + return mask; +} + +SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { + SearchSpecProto search_spec; + search_spec.set_term_match_type(match_type); + return search_spec; +} + +ScoringSpecProto CreateScoringSpec(bool is_descending_order) { + ScoringSpecProto scoring_spec; + scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC + : ScoringSpecProto::Order::ASC); + return scoring_spec; +} + +ResultSpecProto CreateResultSpec(int num_per_page) { + ResultSpecProto result_spec; + result_spec.set_num_per_page(num_per_page); + return result_spec; +} + +TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) { + EXPECT_THAT( + ResultRetrieverV2::Create(/*doc_store=*/nullptr, schema_store_.get(), + language_segmenter_.get(), normalizer_.get()), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + EXPECT_THAT( + ResultRetrieverV2::Create(doc_store.get(), /*schema_store=*/nullptr, + language_segmenter_.get(), normalizer_.get()), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(), + /*language_segmenter=*/nullptr, + normalizer_.get()), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), + /*normalizer=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); +} + +TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(CreateDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + doc_store->Put(CreateDocument(/*id=*/3))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, + doc_store->Put(CreateDocument(/*id=*/4))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, + doc_store->Put(CreateDocument(/*id=*/5))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/19}, + {document_id2, hit_section_id_mask, /*score=*/12}, + {document_id3, hit_section_id_mask, /*score=*/8}, + {document_id4, hit_section_id_mask, /*score=*/3}, + {document_id5, hit_section_id_mask, /*score=*/1}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + SearchResultProto::ResultProto result1; + *result1.mutable_document() = CreateDocument(/*id=*/1); + result1.set_score(19); + SearchResultProto::ResultProto result2; + *result2.mutable_document() = CreateDocument(/*id=*/2); + result2.set_score(12); + SearchResultProto::ResultProto result3; + *result3.mutable_document() = CreateDocument(/*id=*/3); + result3.set_score(8); + SearchResultProto::ResultProto result4; + *result4.mutable_document() = CreateDocument(/*id=*/4); + result4.set_score(3); + SearchResultProto::ResultProto result5; + *result5.mutable_document() = CreateDocument(/*id=*/5); + result5.set_score(1); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/2), *doc_store); + + // First page, 2 results + auto [page_result1, has_more_results1] = + result_retriever->RetrieveNextPage(result_state); + EXPECT_THAT(page_result1.results, + ElementsAre(EqualsProto(result1), EqualsProto(result2))); + // num_results_with_snippets is 0 when there is no snippet. + EXPECT_THAT(page_result1.num_results_with_snippets, Eq(0)); + // Requested page size is same as num_per_page. + EXPECT_THAT(page_result1.requested_page_size, Eq(2)); + // Has more results. + EXPECT_TRUE(has_more_results1); + + // Second page, 2 results + auto [page_result2, has_more_results2] = + result_retriever->RetrieveNextPage(result_state); + EXPECT_THAT(page_result2.results, + ElementsAre(EqualsProto(result3), EqualsProto(result4))); + // num_results_with_snippets is 0 when there is no snippet. + EXPECT_THAT(page_result2.num_results_with_snippets, Eq(0)); + // Requested page size is same as num_per_page. + EXPECT_THAT(page_result2.requested_page_size, Eq(2)); + // Has more results. + EXPECT_TRUE(has_more_results2); + + // Third page, 1 result + auto [page_result3, has_more_results3] = + result_retriever->RetrieveNextPage(result_state); + EXPECT_THAT(page_result3.results, ElementsAre(EqualsProto(result5))); + // num_results_with_snippets is 0 when there is no snippet. + EXPECT_THAT(page_result3.num_results_with_snippets, Eq(0)); + // Requested page size is same as num_per_page. + EXPECT_THAT(page_result3.requested_page_size, Eq(2)); + // No more results. + EXPECT_FALSE(has_more_results3); +} + +TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(CreateDocument(/*id=*/2))); + + DocumentId invalid_document_id = -1; + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/12}, + {document_id2, hit_section_id_mask, /*score=*/4}, + {invalid_document_id, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get(), + std::make_unique<MockGroupResultLimiter>())); + + SearchResultProto::ResultProto result1; + *result1.mutable_document() = CreateDocument(/*id=*/1); + result1.set_score(12); + SearchResultProto::ResultProto result2; + *result2.mutable_document() = CreateDocument(/*id=*/2); + result2.set_score(4); + + ResultStateV2 result_state1( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/3), *doc_store); + PageResult page_result1 = + result_retriever->RetrieveNextPage(result_state1).first; + EXPECT_THAT(page_result1.results, + ElementsAre(EqualsProto(result1), EqualsProto(result2))); + + DocumentId non_existing_document_id = 4; + scored_document_hits = { + {non_existing_document_id, hit_section_id_mask, /*score=*/15}, + {document_id1, hit_section_id_mask, /*score=*/12}, + {document_id2, hit_section_id_mask, /*score=*/4}}; + ResultStateV2 result_state2( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/3), *doc_store); + PageResult page_result2 = + result_retriever->RetrieveNextPage(result_state2).first; + EXPECT_THAT(page_result2.results, + ElementsAre(EqualsProto(result1), EqualsProto(result2))); +} + +TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) { + MockFilesystem mock_filesystem; + EXPECT_CALL(mock_filesystem, + PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>())) + .WillOnce(Return(false)) + .WillRepeatedly(DoDefault()); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(CreateDocument(/*id=*/2))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get(), + std::make_unique<MockGroupResultLimiter>())); + + SearchResultProto::ResultProto result1; + *result1.mutable_document() = CreateDocument(/*id=*/1); + result1.set_score(0); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/2), *doc_store); + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + // We mocked mock_filesystem to return an internal error when retrieving doc2, + // so doc2 should be skipped and doc1 should still be returned. + EXPECT_THAT(page_result.results, ElementsAre(EqualsProto(result1))); +} + +TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(CreateDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + doc_store->Put(CreateDocument(/*id=*/3))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, + doc_store->Put(CreateDocument(/*id=*/4))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, + doc_store->Put(CreateDocument(/*id=*/5))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}, + {document_id4, hit_section_id_mask, /*score=*/0}, + {document_id5, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/2), *doc_store); + + // First page, 2 results + PageResult page_result1 = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result1.results, SizeIs(2)); + { + absl_ports::shared_lock l(&result_state.mutex); + + // num_returned = size of first page + EXPECT_THAT(result_state.num_returned, Eq(2)); + // Should remove the 2 returned docs from scored_document_hits and only + // contain the remaining 3. + EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(3))); + } + + // Second page, 2 results + PageResult page_result2 = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result2.results, SizeIs(2)); + { + absl_ports::shared_lock l(&result_state.mutex); + + // num_returned = size of first and second pages + EXPECT_THAT(result_state.num_returned, Eq(4)); + // Should remove the 2 returned docs from scored_document_hits and only + // contain the remaining 1. + EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(1))); + } + + // Third page, 1 result + PageResult page_result3 = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result3.results, SizeIs(1)); + { + absl_ports::shared_lock l(&result_state.mutex); + + // num_returned = size of first, second and third pages + EXPECT_THAT(result_state.num_returned, Eq(5)); + // Should remove the 1 returned doc from scored_document_hits and become + // empty. + EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(IsEmpty())); + } +} + +TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(CreateDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(CreateDocument(/*id=*/2))); + std::vector<ScoredDocumentHit> scored_document_hits1 = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + std::shared_ptr<ResultStateV2> result_state1 = + std::make_shared<ResultStateV2>( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits1), + /*is_descending=*/true), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/1), *doc_store); + { + absl_ports::unique_lock l(&result_state1->mutex); + + result_state1->RegisterNumTotalHits(&num_total_hits_); + ASSERT_THAT(num_total_hits_, Eq(2)); + } + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, + doc_store->Put(CreateDocument(/*id=*/3))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, + doc_store->Put(CreateDocument(/*id=*/4))); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, + doc_store->Put(CreateDocument(/*id=*/5))); + std::vector<ScoredDocumentHit> scored_document_hits2 = { + {document_id3, hit_section_id_mask, /*score=*/0}, + {document_id4, hit_section_id_mask, /*score=*/0}, + {document_id5, hit_section_id_mask, /*score=*/0}}; + std::shared_ptr<ResultStateV2> result_state2 = + std::make_shared<ResultStateV2>( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits2), + /*is_descending=*/true), + /*query_terms=*/SectionRestrictQueryTermsMap{}, + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/2), *doc_store); + { + absl_ports::unique_lock l(&result_state2->mutex); + + result_state2->RegisterNumTotalHits(&num_total_hits_); + ASSERT_THAT(num_total_hits_, Eq(5)); + } + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Should get 1 doc in the first page of result_state1, and num_total_hits + // should be decremented by 1. + PageResult page_result1 = + result_retriever->RetrieveNextPage(*result_state1).first; + ASSERT_THAT(page_result1.results, SizeIs(1)); + EXPECT_THAT(num_total_hits_, Eq(4)); + + // Should get 2 docs in the first page of result_state2, and num_total_hits + // should be decremented by 2. + PageResult page_result2 = + result_retriever->RetrieveNextPage(*result_state2).first; + ASSERT_THAT(page_result2.results, SizeIs(2)); + EXPECT_THAT(num_total_hits_, Eq(2)); + + // Should get 1 doc in the second page of result_state2 (although num_per_page + // is 2, there is only 1 doc left), and num_total_hits should be decremented + // by 1. + PageResult page_result3 = + result_retriever->RetrieveNextPage(*result_state2).first; + ASSERT_THAT(page_result3.results, SizeIs(1)); + EXPECT_THAT(num_total_hits_, Eq(1)); + + // Destruct result_state1. There is 1 doc left, so num_total_hits should be + // decremented by 1 when destructing it. + result_state1.reset(); + EXPECT_THAT(num_total_hits_, Eq(0)); + + // Destruct result_state2. There is 0 doc left, so num_total_hits should be + // unchanged when destructing it. + result_state1.reset(); + EXPECT_THAT(num_total_hits_, Eq(0)); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc index d606e79..1057f9b 100644 --- a/icing/result/result-state-manager.cc +++ b/icing/result/result-state-manager.cc @@ -23,11 +23,13 @@ namespace icing { namespace lib { ResultStateManager::ResultStateManager(int max_total_hits, - const DocumentStore& document_store) + const DocumentStore& document_store, + const Clock* clock) : document_store_(document_store), max_total_hits_(max_total_hits), num_total_hits_(0), - random_generator_(GetSteadyTimeNanoseconds()) {} + random_generator_(GetSteadyTimeNanoseconds()), + clock_(*clock) {} libtextclassifier3::StatusOr<PageResultState> ResultStateManager::RankAndPaginate(ResultState result_state) { @@ -75,7 +77,8 @@ uint64_t ResultStateManager::Add(ResultState result_state) { num_total_hits_ += result_state.num_remaining(); result_state_map_.emplace(new_token, std::move(result_state)); // Tracks the insertion order - token_queue_.push(new_token); + token_queue_.push( + std::make_pair(new_token, clock_.GetSystemTimeMilliseconds())); return new_token; } @@ -134,10 +137,16 @@ void ResultStateManager::InvalidateAllResultStates() { InternalInvalidateAllResultStates(); } +void ResultStateManager::InvalidateExpiredResultStates( + int64_t result_state_ttl) { + absl_ports::unique_lock l(&mutex_); + InternalInvalidateExpiredResultStates(result_state_ttl); +} + void ResultStateManager::InternalInvalidateAllResultStates() { result_state_map_.clear(); invalidated_token_set_.clear(); - token_queue_ = std::queue<uint64_t>(); + token_queue_ = std::queue<std::pair<uint64_t, int64_t>>(); num_total_hits_ = 0; } @@ -170,16 +179,16 @@ void ResultStateManager::RemoveStatesIfNeeded(const ResultState& result_state) { // 2. Remove any tokens that were previously invalidated. while (!token_queue_.empty() && - invalidated_token_set_.find(token_queue_.front()) != + invalidated_token_set_.find(token_queue_.front().first) != invalidated_token_set_.end()) { - invalidated_token_set_.erase(token_queue_.front()); + invalidated_token_set_.erase(token_queue_.front().first); token_queue_.pop(); } // 3. If we're over budget, remove states from oldest to newest until we fit // into our budget. while (result_state.num_remaining() + num_total_hits_ > max_total_hits_) { - InternalInvalidateResultState(token_queue_.front()); + InternalInvalidateResultState(token_queue_.front().first); token_queue_.pop(); } invalidated_token_set_.clear(); @@ -198,5 +207,24 @@ void ResultStateManager::InternalInvalidateResultState(uint64_t token) { } } +void ResultStateManager::InternalInvalidateExpiredResultStates( + int64_t result_state_ttl) { + int64_t current_time = clock_.GetSystemTimeMilliseconds(); + while (!token_queue_.empty() && + current_time - token_queue_.front().second >= result_state_ttl) { + auto itr = result_state_map_.find(token_queue_.front().first); + if (itr != result_state_map_.end()) { + num_total_hits_ -= itr->second.num_remaining(); + result_state_map_.erase(itr); + } else { + // Since result_state_map_ and invalidated_token_set_ are mutually + // exclusive, we remove the token from invalidated_token_set_ only if it + // isn't present in result_state_map_. + invalidated_token_set_.erase(token_queue_.front().first); + } + token_queue_.pop(); + } +} + } // namespace lib } // namespace icing diff --git a/icing/result/result-state-manager.h b/icing/result/result-state-manager.h index c04217f..745b0ec 100644 --- a/icing/result/result-state-manager.h +++ b/icing/result/result-state-manager.h @@ -26,6 +26,7 @@ #include "icing/proto/search.pb.h" #include "icing/result/page-result-state.h" #include "icing/result/result-state.h" +#include "icing/util/clock.h" namespace icing { namespace lib { @@ -34,11 +35,16 @@ namespace lib { // SearchResultProto.next_page_token. inline constexpr uint64_t kInvalidNextPageToken = 0; +// 1 hr as the default ttl for a ResultState after being pushed into +// token_queue_. +inline constexpr int64_t kDefaultResultStateTtlInMs = 1LL * 60 * 60 * 1000; + // Used to store and manage ResultState. class ResultStateManager { public: explicit ResultStateManager(int max_total_hits, - const DocumentStore& document_store); + const DocumentStore& document_store, + const Clock* clock); ResultStateManager(const ResultStateManager&) = delete; ResultStateManager& operator=(const ResultStateManager&) = delete; @@ -75,6 +81,12 @@ class ResultStateManager { // Invalidates all result states / tokens currently in ResultStateManager. void InvalidateAllResultStates() ICING_LOCKS_EXCLUDED(mutex_); + // Invalidates expired result states / tokens currently in ResultStateManager + // that were created before current_time - result_state_ttl. + void InvalidateExpiredResultStates( + int64_t result_state_ttl = kDefaultResultStateTtlInMs) + ICING_LOCKS_EXCLUDED(mutex_); + private: absl_ports::shared_mutex mutex_; @@ -94,8 +106,9 @@ class ResultStateManager { std::unordered_map<uint64_t, ResultState> result_state_map_ ICING_GUARDED_BY(mutex_); - // A queue used to track the insertion order of tokens - std::queue<uint64_t> token_queue_ ICING_GUARDED_BY(mutex_); + // A queue used to track the insertion order of tokens with pushed timestamps. + std::queue<std::pair<uint64_t, int64_t>> token_queue_ + ICING_GUARDED_BY(mutex_); // A set to temporarily store the invalidated tokens before they're finally // removed from token_queue_. We store the invalidated tokens to ensure the @@ -105,6 +118,8 @@ class ResultStateManager { // A random 64-bit number generator std::mt19937_64 random_generator_ ICING_GUARDED_BY(mutex_); + const Clock& clock_; // Does not own. + // Puts a new result state into the internal storage and returns a next-page // token associated with it. The token is guaranteed to be unique among all // currently valid tokens. When the maximum number of result states is @@ -126,12 +141,18 @@ class ResultStateManager { void InternalInvalidateResultState(uint64_t token) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); - // Internal method to invalidates all result states / tokens currently in + // Internal method to invalidate all result states / tokens currently in // ResultStateManager. We need this separate method so that other public // methods don't need to call InvalidateAllResultStates(). Public methods // calling each other may cause deadlock issues. void InternalInvalidateAllResultStates() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Internal method to invalidate and remove expired result states / tokens + // currently in ResultStateManager that were created before + // current_time - result_state_ttl. + void InternalInvalidateExpiredResultStates(int64_t result_state_ttl) + ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); }; } // namespace lib diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc index 8a9005d..251a736 100644 --- a/icing/result/result-state-manager_test.cc +++ b/icing/result/result-state-manager_test.cc @@ -21,6 +21,7 @@ #include "icing/schema/schema-store.h" #include "icing/store/document-store.h" #include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" #include "icing/testing/tmp-directory.h" #include "icing/util/clock.h" @@ -52,11 +53,13 @@ ScoredDocumentHit CreateScoredHit(DocumentId document_id) { class ResultStateManagerTest : public testing::Test { protected: void SetUp() override { + clock_ = std::make_unique<FakeClock>(); + schema_store_base_dir_ = GetTestTempDir() + "/schema_store"; filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str()); ICING_ASSERT_OK_AND_ASSIGN( - schema_store_, - SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_)); + schema_store_, SchemaStore::Create(&filesystem_, schema_store_base_dir_, + clock_.get())); SchemaProto schema; schema.add_types()->set_schema_type("Document"); ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema))); @@ -65,7 +68,7 @@ class ResultStateManagerTest : public testing::Test { filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult result, - DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_, + DocumentStore::Create(&filesystem_, doc_store_base_dir_, clock_.get(), schema_store_.get())); document_store_ = std::move(result.document_store); } @@ -73,6 +76,7 @@ class ResultStateManagerTest : public testing::Test { void TearDown() override { filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str()); filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str()); + clock_.reset(); } ResultState CreateResultState( @@ -92,13 +96,16 @@ class ResultStateManagerTest : public testing::Test { return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1); } + FakeClock* clock() { return clock_.get(); } + const FakeClock* clock() const { return clock_.get(); } + const DocumentStore& document_store() const { return *document_store_; } private: Filesystem filesystem_; + std::unique_ptr<FakeClock> clock_; std::string doc_store_base_dir_; std::string schema_store_base_dir_; - Clock clock_; std::unique_ptr<DocumentStore> document_store_; std::unique_ptr<SchemaStore> schema_store_; }; @@ -111,7 +118,8 @@ TEST_F(ResultStateManagerTest, ShouldRankAndPaginateOnePage) { /*num_per_page=*/10); ResultStateManager result_state_manager( - /*max_total_hits=*/std::numeric_limits<int>::max(), document_store()); + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state, result_state_manager.RankAndPaginate(std::move(original_result_state))); @@ -136,7 +144,8 @@ TEST_F(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) { /*num_per_page=*/2); ResultStateManager result_state_manager( - /*max_total_hits=*/std::numeric_limits<int>::max(), document_store()); + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); // First page, 2 results ICING_ASSERT_OK_AND_ASSIGN( @@ -173,7 +182,8 @@ TEST_F(ResultStateManagerTest, EmptyStateShouldReturnError) { ResultState empty_result_state = CreateResultState({}, /*num_per_page=*/1); ResultStateManager result_state_manager( - /*max_total_hits=*/std::numeric_limits<int>::max(), document_store()); + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); EXPECT_THAT( result_state_manager.RankAndPaginate(std::move(empty_result_state)), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); @@ -192,7 +202,8 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) { /*num_per_page=*/1); ResultStateManager result_state_manager( - /*max_total_hits=*/std::numeric_limits<int>::max(), document_store()); + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -230,7 +241,8 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) { /*num_per_page=*/1); ResultStateManager result_state_manager( - /*max_total_hits=*/std::numeric_limits<int>::max(), document_store()); + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -251,6 +263,50 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) { StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } +TEST_F(ResultStateManagerTest, ShouldInvalidateOldTokens) { + ResultState result_state1 = + CreateResultState({AddScoredDocument(/*document_id=*/0), + AddScoredDocument(/*document_id=*/1), + AddScoredDocument(/*document_id=*/2)}, + /*num_per_page=*/1); + ResultState result_state2 = + CreateResultState({AddScoredDocument(/*document_id=*/3), + AddScoredDocument(/*document_id=*/4), + AddScoredDocument(/*document_id=*/5)}, + /*num_per_page=*/1); + + ResultStateManager result_state_manager( + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); + // Set time as 1s and add state 1. + clock()->SetSystemTimeMilliseconds(1000); + ICING_ASSERT_OK_AND_ASSIGN( + PageResultState page_result_state1, + result_state_manager.RankAndPaginate(std::move(result_state1))); + // Set time as 1hr2s and add state 2. + clock()->SetSystemTimeMilliseconds(kDefaultResultStateTtlInMs + 2000); + ICING_ASSERT_OK_AND_ASSIGN( + PageResultState page_result_state2, + result_state_manager.RankAndPaginate(std::move(result_state2))); + + // Invalidates expired states with default ttl (1 hr). This should only + // invalidate state 1. + result_state_manager.InvalidateExpiredResultStates(); + + // page_result_state1.next_page_token() shouldn't be found + EXPECT_THAT( + result_state_manager.GetNextPage(page_result_state1.next_page_token), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + + // page_result_state2.next_page_token() should be found + ICING_ASSERT_OK_AND_ASSIGN( + page_result_state2, + result_state_manager.GetNextPage(page_result_state2.next_page_token)); + EXPECT_THAT(page_result_state2.scored_document_hits, + ElementsAre(EqualsScoredDocumentHit(CreateScoredHit( + /*document_id=*/4)))); +} + TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) { ResultState result_state1 = CreateResultState({AddScoredDocument(/*document_id=*/0), @@ -266,7 +322,7 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) { /*num_per_page=*/1); ResultStateManager result_state_manager(/*max_total_hits=*/2, - document_store()); + document_store(), clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -317,7 +373,7 @@ TEST_F(ResultStateManagerTest, // Each result state has a page size of 1 and a result set of 2 hits. So each // result will take up one hit of our three hit budget. ResultStateManager result_state_manager(/*max_total_hits=*/3, - document_store()); + document_store(), clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -390,7 +446,7 @@ TEST_F(ResultStateManagerTest, // Each result state has a page size of 1 and a result set of 2 hits. So each // result will take up one hit of our three hit budget. ResultStateManager result_state_manager(/*max_total_hits=*/3, - document_store()); + document_store(), clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -463,6 +519,99 @@ TEST_F(ResultStateManagerTest, /*document_id=*/10)))); } +TEST_F(ResultStateManagerTest, + InvalidatedOldResultStatesShouldDecreaseCurrentHitsCount) { + ResultState result_state1 = + CreateResultState({AddScoredDocument(/*document_id=*/0), + AddScoredDocument(/*document_id=*/1), + AddScoredDocument(/*document_id=*/2), + AddScoredDocument(/*document_id=*/3)}, + /*num_per_page=*/1); + ResultState result_state2 = + CreateResultState({AddScoredDocument(/*document_id=*/4), + AddScoredDocument(/*document_id=*/5)}, + /*num_per_page=*/1); + ResultState result_state3 = + CreateResultState({AddScoredDocument(/*document_id=*/6), + AddScoredDocument(/*document_id=*/7)}, + /*num_per_page=*/1); + ResultState result_state4 = + CreateResultState({AddScoredDocument(/*document_id=*/8), + AddScoredDocument(/*document_id=*/9)}, + /*num_per_page=*/1); + + // Add the first three states. Remember, the first page for each result state + // won't be cached (since it is returned immediately from RankAndPaginate). + // So state 1 ~ state 4 will take up 6 hits in total. + ResultStateManager result_state_manager(/*max_total_hits=*/6, + document_store(), clock()); + // Set time as 1000ms and add state 1. + clock()->SetSystemTimeMilliseconds(1000); + ICING_ASSERT_OK_AND_ASSIGN( + PageResultState page_result_state1, + result_state_manager.RankAndPaginate(std::move(result_state1))); + // Set time as 1001ms and add state 2. + clock()->SetSystemTimeMilliseconds(1001); + ICING_ASSERT_OK_AND_ASSIGN( + PageResultState page_result_state2, + result_state_manager.RankAndPaginate(std::move(result_state2))); + // Set time as 1002ms and add state 3. + clock()->SetSystemTimeMilliseconds(1002); + ICING_ASSERT_OK_AND_ASSIGN( + PageResultState page_result_state3, + result_state_manager.RankAndPaginate(std::move(result_state3))); + // Set time as 1003ms and add state 4. + clock()->SetSystemTimeMilliseconds(1003); + ICING_ASSERT_OK_AND_ASSIGN( + PageResultState page_result_state4, + result_state_manager.RankAndPaginate(std::move(result_state4))); + + // Set time as kDefaultResultStateTtlInMs + 1001ms and invalidate expired + // states with default ttl (1 hr). This should invalidate state 1 and state 2. + clock()->SetSystemTimeMilliseconds(kDefaultResultStateTtlInMs + 1001); + result_state_manager.InvalidateExpiredResultStates(); + + // page_result_state1.next_page_token() shouldn't be found + EXPECT_THAT( + result_state_manager.GetNextPage(page_result_state1.next_page_token), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + // page_result_state2.next_page_token() shouldn't be found + EXPECT_THAT( + result_state_manager.GetNextPage(page_result_state2.next_page_token), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + + // If invalidating state 1 and state 2 correctly decremented the current hit + // count by 4 (to 2), then adding state 5 should still be within our budget + // and no other result states should be evicted. + ResultState result_state5 = + CreateResultState({AddScoredDocument(/*document_id=*/10), + AddScoredDocument(/*document_id=*/11), + AddScoredDocument(/*document_id=*/12), + AddScoredDocument(/*document_id=*/13), + AddScoredDocument(/*document_id=*/14)}, + /*num_per_page=*/1); + ICING_ASSERT_OK_AND_ASSIGN( + PageResultState page_result_state5, + result_state_manager.RankAndPaginate(std::move(result_state5))); + + // page_result_state3.next_page_token() should be found since there is no + // eviction. + ICING_ASSERT_OK_AND_ASSIGN( + page_result_state3, + result_state_manager.GetNextPage(page_result_state3.next_page_token)); + EXPECT_THAT(page_result_state3.scored_document_hits, + ElementsAre(EqualsScoredDocumentHit(CreateScoredHit( + /*document_id=*/6)))); + // page_result_state4.next_page_token() should be found since there is no + // eviction. + ICING_ASSERT_OK_AND_ASSIGN( + page_result_state4, + result_state_manager.GetNextPage(page_result_state4.next_page_token)); + EXPECT_THAT(page_result_state4.scored_document_hits, + ElementsAre(EqualsScoredDocumentHit(CreateScoredHit( + /*document_id=*/8)))); +} + TEST_F( ResultStateManagerTest, InvalidatedResultStateShouldDecreaseCurrentHitsCountByExactStateHitCount) { @@ -484,7 +633,7 @@ TEST_F( // Each result state has a page size of 1 and a result set of 2 hits. So each // result will take up one hit of our three hit budget. ResultStateManager result_state_manager(/*max_total_hits=*/3, - document_store()); + document_store(), clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -571,7 +720,7 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) { // Each result state has a page size of 1 and a result set of 2 hits. So each // result will take up one hit of our three hit budget. ResultStateManager result_state_manager(/*max_total_hits=*/3, - document_store()); + document_store(), clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -648,7 +797,7 @@ TEST_F(ResultStateManagerTest, // Each result state has a page size of 1 and a result set of 2 hits. So each // result will take up one hit of our three hit budget. ResultStateManager result_state_manager(/*max_total_hits=*/3, - document_store()); + document_store(), clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -736,7 +885,7 @@ TEST_F(ResultStateManagerTest, // won't be cached (since it is returned immediately from RankAndPaginate). // Each result state has a page size of 1. So 3 hits will remain cached. ResultStateManager result_state_manager(/*max_total_hits=*/4, - document_store()); + document_store(), clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state1, result_state_manager.RankAndPaginate(std::move(result_state1))); @@ -807,7 +956,7 @@ TEST_F(ResultStateManagerTest, TEST_F(ResultStateManagerTest, AddingResultStateShouldEvictOverBudgetResultState) { ResultStateManager result_state_manager(/*max_total_hits=*/4, - document_store()); + document_store(), clock()); // Add a result state that is larger than the entire budget. The entire result // state will still be cached ResultState result_state1 = @@ -864,7 +1013,8 @@ TEST_F(ResultStateManagerTest, ShouldGetSnippetContext) { document_store()); ResultStateManager result_state_manager( - /*max_total_hits=*/std::numeric_limits<int>::max(), document_store()); + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state, result_state_manager.RankAndPaginate(std::move(original_result_state))); @@ -899,7 +1049,8 @@ TEST_F(ResultStateManagerTest, ShouldGetDefaultSnippetContext) { document_store()); ResultStateManager result_state_manager( - /*max_total_hits=*/std::numeric_limits<int>::max(), document_store()); + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); ICING_ASSERT_OK_AND_ASSIGN( PageResultState page_result_state, result_state_manager.RankAndPaginate(std::move(original_result_state))); @@ -924,7 +1075,8 @@ TEST_F(ResultStateManagerTest, ShouldGetCorrectNumPreviouslyReturned) { /*num_per_page=*/2); ResultStateManager result_state_manager( - /*max_total_hits=*/std::numeric_limits<int>::max(), document_store()); + /*max_total_hits=*/std::numeric_limits<int>::max(), document_store(), + clock()); // First page, 2 results ICING_ASSERT_OK_AND_ASSIGN( @@ -970,7 +1122,7 @@ TEST_F(ResultStateManagerTest, ShouldStoreAllHits) { /*num_per_page=*/2); ResultStateManager result_state_manager(/*max_total_hits=*/4, - document_store()); + document_store(), clock()); // The 5 input scored document hits will not be truncated. The first page of // two hits will be returned immediately and the other three hits will fit diff --git a/icing/result/result-state-v2.cc b/icing/result/result-state-v2.cc new file mode 100644 index 0000000..dde50e3 --- /dev/null +++ b/icing/result/result-state-v2.cc @@ -0,0 +1,94 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/result/result-state-v2.h" + +#include <atomic> +#include <memory> + +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/result/projection-tree.h" +#include "icing/result/snippet-context.h" +#include "icing/scoring/scored-document-hits-ranker.h" + +namespace icing { +namespace lib { + +namespace { +SnippetContext CreateSnippetContext(SectionRestrictQueryTermsMap query_terms, + const SearchSpecProto& search_spec, + const ResultSpecProto& result_spec) { + if (result_spec.snippet_spec().num_to_snippet() > 0 && + result_spec.snippet_spec().num_matches_per_property() > 0) { + // Needs snippeting + return SnippetContext(std::move(query_terms), result_spec.snippet_spec(), + search_spec.term_match_type()); + } + return SnippetContext(/*query_terms_in=*/{}, + ResultSpecProto::SnippetSpecProto::default_instance(), + TermMatchType::UNKNOWN); +} +} // namespace + +ResultStateV2::ResultStateV2( + std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in, + SectionRestrictQueryTermsMap query_terms, + const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, + const ResultSpecProto& result_spec, const DocumentStore& document_store) + : scored_document_hits_ranker(std::move(scored_document_hits_ranker_in)), + num_returned(0), + snippet_context_(CreateSnippetContext(std::move(query_terms), search_spec, + result_spec)), + num_per_page_(result_spec.num_per_page()), + num_total_hits_(nullptr) { + for (const TypePropertyMask& type_field_mask : + result_spec.type_property_masks()) { + projection_tree_map_.insert( + {type_field_mask.schema_type(), ProjectionTree(type_field_mask)}); + } + + for (const ResultSpecProto::ResultGrouping& result_grouping : + result_spec.result_groupings()) { + int group_id = group_result_limits.size(); + group_result_limits.push_back(result_grouping.max_results()); + for (const std::string& name_space : result_grouping.namespaces()) { + auto namespace_id_or = document_store.GetNamespaceId(name_space); + if (!namespace_id_or.ok()) { + continue; + } + namespace_group_id_map_.insert({namespace_id_or.ValueOrDie(), group_id}); + } + } +} + +ResultStateV2::~ResultStateV2() { + IncrementNumTotalHits(-1 * scored_document_hits_ranker->size()); +} + +void ResultStateV2::RegisterNumTotalHits(std::atomic<int>* num_total_hits) { + // Decrement the original num_total_hits_ before registering a new one. + IncrementNumTotalHits(-1 * scored_document_hits_ranker->size()); + num_total_hits_ = num_total_hits; + IncrementNumTotalHits(scored_document_hits_ranker->size()); +} + +void ResultStateV2::IncrementNumTotalHits(int increment_by) { + if (num_total_hits_ != nullptr) { + *num_total_hits_ += increment_by; + } +} + +} // namespace lib +} // namespace icing diff --git a/icing/result/result-state-v2.h b/icing/result/result-state-v2.h new file mode 100644 index 0000000..fc56936 --- /dev/null +++ b/icing/result/result-state-v2.h @@ -0,0 +1,125 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_RESULT_RESULT_STATE_V2_H_ +#define ICING_RESULT_RESULT_STATE_V2_H_ + +#include <atomic> +#include <memory> +#include <unordered_map> +#include <vector> + +#include "icing/absl_ports/mutex.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/result/projection-tree.h" +#include "icing/result/snippet-context.h" +#include "icing/scoring/scored-document-hits-ranker.h" +#include "icing/store/document-store.h" +#include "icing/store/namespace-id.h" + +namespace icing { +namespace lib { + +// Used to hold information needed across multiple pagination requests of the +// same query. Stored in ResultStateManager. +class ResultStateV2 { + public: + explicit ResultStateV2( + std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in, + SectionRestrictQueryTermsMap query_terms, + const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, + const ResultSpecProto& result_spec, const DocumentStore& document_store); + + ~ResultStateV2(); + + // Register num_total_hits_ and add current scored_document_hits_ranker.size() + // to it. When re-registering, it will subtract + // scored_document_hits_ranker.size() from the original counter. + void RegisterNumTotalHits(std::atomic<int>* num_total_hits) + ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex); + + // Increment the global counter num_total_hits_ by increment_by, if + // num_total_hits_ has been registered (is not nullptr). + // Note that providing a negative value for increment_by is a valid usage, + // which will actually decrement num_total_hits_. + // + // It has to be called when we change scored_document_hits_ranker. + void IncrementNumTotalHits(int increment_by) + ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex); + + const SnippetContext& snippet_context() const + ICING_SHARED_LOCKS_REQUIRED(mutex) { + return snippet_context_; + } + + const std::unordered_map<std::string, ProjectionTree>& projection_tree_map() + const ICING_SHARED_LOCKS_REQUIRED(mutex) { + return projection_tree_map_; + } + + const std::unordered_map<NamespaceId, int>& namespace_group_id_map() const + ICING_SHARED_LOCKS_REQUIRED(mutex) { + return namespace_group_id_map_; + } + + int num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) { + return num_per_page_; + } + + absl_ports::shared_mutex mutex; + + // When evaluating the next top K hits from scored_document_hits_ranker, some + // of them may be filtered out by group_result_limits and won't return to the + // client, so they shouldn't be counted into num_returned. Also the logic of + // group result limiting depends on retrieval, so it is impossible for + // ResultState itself to correctly modify these fields. Thus, we make them + // public, so users of this class can modify them directly. + + // The scored document hits ranker. + std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker + ICING_GUARDED_BY(mutex); + + // The count of remaining results to return for a group where group id is the + // index. + std::vector<int> group_result_limits ICING_GUARDED_BY(mutex); + + // Number of results that have already been returned. + int num_returned ICING_GUARDED_BY(mutex); + + private: + // Information needed for snippeting. + SnippetContext snippet_context_ ICING_GUARDED_BY(mutex); + + // Information needed for projection. + std::unordered_map<std::string, ProjectionTree> projection_tree_map_ + ICING_GUARDED_BY(mutex); + + // A map between namespace id and the id of the group that it appears in. + std::unordered_map<NamespaceId, int> namespace_group_id_map_ + ICING_GUARDED_BY(mutex); + + // Number of results to return in each page. + int num_per_page_ ICING_GUARDED_BY(mutex); + + // Pointer to a global counter to sum up the size of + // scored_document_hits_ranker in all ResultStates. + // Does not own. + std::atomic<int>* num_total_hits_ ICING_GUARDED_BY(mutex); +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_RESULT_RESULT_STATE_V2_H_ diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc new file mode 100644 index 0000000..8e6b29a --- /dev/null +++ b/icing/result/result-state-v2_test.cc @@ -0,0 +1,443 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/result/result-state-v2.h" + +#include <atomic> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "gtest/gtest.h" +#include "icing/absl_ports/mutex.h" +#include "icing/file/filesystem.h" +#include "icing/portable/equals-proto.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/result/projection-tree.h" +#include "icing/result/snippet-context.h" +#include "icing/schema/schema-store.h" +#include "icing/scoring/priority-queue-scored-document-hits-ranker.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/scoring/scored-document-hits-ranker.h" +#include "icing/store/document-store.h" +#include "icing/store/namespace-id.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" +#include "icing/util/clock.h" + +namespace icing { +namespace lib { +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { + SearchSpecProto search_spec; + search_spec.set_term_match_type(match_type); + return search_spec; +} + +ScoringSpecProto CreateScoringSpec(bool is_descending_order) { + ScoringSpecProto scoring_spec; + scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC + : ScoringSpecProto::Order::ASC); + return scoring_spec; +} + +ResultSpecProto CreateResultSpec(int num_per_page) { + ResultSpecProto result_spec; + result_spec.set_num_per_page(num_per_page); + return result_spec; +} + +class ResultStateV2Test : public ::testing::Test { + protected: + void SetUp() override { + schema_store_base_dir_ = GetTestTempDir() + "/schema_store"; + filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str()); + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_)); + SchemaProto schema; + schema.add_types()->set_schema_type("Document"); + ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema))); + + doc_store_base_dir_ = GetTestTempDir() + "/document_store"; + filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str()); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult result, + DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_, + schema_store_.get())); + document_store_ = std::move(result.document_store); + + num_total_hits_ = 0; + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str()); + filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str()); + } + + ScoredDocumentHit AddScoredDocument(DocumentId document_id) { + DocumentProto document; + document.set_namespace_("namespace"); + document.set_uri(std::to_string(document_id)); + document.set_schema("Document"); + document_store_->Put(std::move(document)); + return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1); + } + + DocumentStore& document_store() { return *document_store_; } + + std::atomic<int>& num_total_hits() { return num_total_hits_; } + + const std::atomic<int>& num_total_hits() const { return num_total_hits_; } + + private: + Filesystem filesystem_; + std::string doc_store_base_dir_; + std::string schema_store_base_dir_; + Clock clock_; + std::unique_ptr<DocumentStore> document_store_; + std::unique_ptr<SchemaStore> schema_store_; + std::atomic<int> num_total_hits_; +}; + +TEST_F(ResultStateV2Test, ShouldReturnSnippetContextAccordingToSpecs) { + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + result_spec.mutable_snippet_spec()->set_num_to_snippet(5); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(5); + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5); + + SectionRestrictQueryTermsMap query_terms_map; + query_terms_map.emplace("term1", std::unordered_set<std::string>()); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::vector<ScoredDocumentHit>(), + /*is_descending=*/true), + query_terms_map, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + document_store()); + + absl_ports::shared_lock l(&result_state.mutex); + + const SnippetContext snippet_context = result_state.snippet_context(); + + // Snippet context should be derived from the specs above. + EXPECT_TRUE(snippet_context.query_terms.find("term1") != + snippet_context.query_terms.end()); + EXPECT_THAT(snippet_context.snippet_spec, + EqualsProto(result_spec.snippet_spec())); + EXPECT_THAT(snippet_context.match_type, Eq(TermMatchType::EXACT_ONLY)); + + // The same copy can be fetched multiple times. + const SnippetContext snippet_context2 = result_state.snippet_context(); + EXPECT_TRUE(snippet_context2.query_terms.find("term1") != + snippet_context2.query_terms.end()); + EXPECT_THAT(snippet_context2.snippet_spec, + EqualsProto(result_spec.snippet_spec())); + EXPECT_THAT(snippet_context2.match_type, Eq(TermMatchType::EXACT_ONLY)); +} + +TEST_F(ResultStateV2Test, NoSnippetingShouldReturnNull) { + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + // Setting num_to_snippet to 0 so that snippeting info won't be + // stored. + result_spec.mutable_snippet_spec()->set_num_to_snippet(0); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(5); + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5); + + SectionRestrictQueryTermsMap query_terms_map; + query_terms_map.emplace("term1", std::unordered_set<std::string>()); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::vector<ScoredDocumentHit>(), + /*is_descending=*/true), + query_terms_map, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + document_store()); + + absl_ports::shared_lock l(&result_state.mutex); + + const SnippetContext snippet_context = result_state.snippet_context(); + EXPECT_THAT(snippet_context.query_terms, IsEmpty()); + EXPECT_THAT( + snippet_context.snippet_spec, + EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance())); + EXPECT_THAT(snippet_context.match_type, TermMatchType::UNKNOWN); +} + +TEST_F(ResultStateV2Test, ShouldConstructProjectionTreeMapAccordingToSpecs) { + // Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* email_type_property_mask = + result_spec.add_type_property_masks(); + email_type_property_mask->set_schema_type("Email"); + email_type_property_mask->add_paths("sender.name"); + email_type_property_mask->add_paths("sender.emailAddress"); + TypePropertyMask* phone_type_property_mask = + result_spec.add_type_property_masks(); + phone_type_property_mask->set_schema_type("Phone"); + phone_type_property_mask->add_paths("caller"); + TypePropertyMask* wildcard_type_property_mask = + result_spec.add_type_property_masks(); + wildcard_type_property_mask->set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask->add_paths("wild.card"); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::vector<ScoredDocumentHit>(), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + document_store()); + + absl_ports::shared_lock l(&result_state.mutex); + + const std::unordered_map<std::string, ProjectionTree>& projection_tree_map = + result_state.projection_tree_map(); + EXPECT_THAT(projection_tree_map, + UnorderedElementsAre( + Pair("Email", ProjectionTree(*email_type_property_mask)), + Pair("Phone", ProjectionTree(*phone_type_property_mask)), + Pair(std::string(ProjectionTree::kSchemaTypeWildcard), + ProjectionTree(*wildcard_type_property_mask)))); +} + +TEST_F(ResultStateV2Test, + ShouldConstructNamespaceGroupIdMapAndGroupResultLimitsAccordingToSpecs) { + // Create 3 docs under namespace1, namespace2, namespace3. + DocumentProto document1; + document1.set_namespace_("namespace1"); + document1.set_uri("uri/1"); + document1.set_schema("Document"); + ICING_ASSERT_OK(document_store().Put(std::move(document1))); + + DocumentProto document2; + document2.set_namespace_("namespace2"); + document2.set_uri("uri/2"); + document2.set_schema("Document"); + ICING_ASSERT_OK(document_store().Put(std::move(document2))); + + DocumentProto document3; + document3.set_namespace_("namespace3"); + document3.set_uri("uri/3"); + document3.set_schema("Document"); + ICING_ASSERT_OK(document_store().Put(std::move(document3))); + + // Create a ResultSpec that limits "namespace1" to 3 results and limits + // "namespace2"+"namespace3" to a total of 2 results. Also add + // "nonexistentNamespace1" and "nonexistentNamespace2" to test the behavior. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/5); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + result_grouping->set_max_results(3); + result_grouping->add_namespaces("namespace1"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(5); + result_grouping->add_namespaces("nonexistentNamespace2"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(2); + result_grouping->add_namespaces("namespace2"); + result_grouping->add_namespaces("namespace3"); + result_grouping->add_namespaces("nonexistentNamespace1"); + + // Get namespace ids. + ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id1, + document_store().GetNamespaceId("namespace1")); + ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id2, + document_store().GetNamespaceId("namespace2")); + ICING_ASSERT_OK_AND_ASSIGN(NamespaceId namespace_id3, + document_store().GetNamespaceId("namespace3")); + + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::vector<ScoredDocumentHit>(), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + document_store()); + + absl_ports::shared_lock l(&result_state.mutex); + + // "namespace1" should be in group 0, and "namespace2"+"namespace3" should be + // in group 2. + // "nonexistentNamespace1" and "nonexistentNamespace2" shouldn't exist. + EXPECT_THAT( + result_state.namespace_group_id_map(), + UnorderedElementsAre(Pair(namespace_id1, 0), Pair(namespace_id2, 2), + Pair(namespace_id3, 2))); + + // group_result_limits should contain 3 (at index 0 for group 0), 5 (at index + // 1 for group 1), 2 (at index 2 for group 2), even though there is no valid + // namespace in group 1. + EXPECT_THAT(result_state.group_result_limits, ElementsAre(3, 5, 2)); +} + +TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHits) { + std::vector<ScoredDocumentHit> scored_document_hits = { + AddScoredDocument(/*document_id=*/1), + AddScoredDocument(/*document_id=*/0), + AddScoredDocument(/*document_id=*/2), + AddScoredDocument(/*document_id=*/4), + AddScoredDocument(/*document_id=*/3)}; + + // Creates a ResultState with 5 ScoredDocumentHits. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/5), document_store()); + + absl_ports::unique_lock l(&result_state.mutex); + + EXPECT_THAT(num_total_hits(), Eq(0)); + result_state.RegisterNumTotalHits(&num_total_hits()); + EXPECT_THAT(num_total_hits(), Eq(5)); + result_state.IncrementNumTotalHits(500); + EXPECT_THAT(num_total_hits(), Eq(505)); +} + +TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHitsWhenDestructed) { + std::vector<ScoredDocumentHit> scored_document_hits1 = { + AddScoredDocument(/*document_id=*/1), + AddScoredDocument(/*document_id=*/0), + AddScoredDocument(/*document_id=*/2), + AddScoredDocument(/*document_id=*/4), + AddScoredDocument(/*document_id=*/3)}; + + std::vector<ScoredDocumentHit> scored_document_hits2 = { + AddScoredDocument(/*document_id=*/6), + AddScoredDocument(/*document_id=*/5)}; + + num_total_hits() = 2; + { + // Creates a ResultState with 5 ScoredDocumentHits. + ResultStateV2 result_state1( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits1), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/5), document_store()); + + absl_ports::unique_lock l(&result_state1.mutex); + + result_state1.RegisterNumTotalHits(&num_total_hits()); + ASSERT_THAT(num_total_hits(), Eq(7)); + + { + // Creates another ResultState with 2 ScoredDocumentHits. + ResultStateV2 result_state2( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits2), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/5), document_store()); + + absl_ports::unique_lock l(&result_state2.mutex); + + result_state2.RegisterNumTotalHits(&num_total_hits()); + ASSERT_THAT(num_total_hits(), Eq(9)); + } + + EXPECT_THAT(num_total_hits(), Eq(7)); + } + EXPECT_THAT(num_total_hits(), Eq(2)); +} + +TEST_F(ResultStateV2Test, ShouldNotUpdateNumTotalHitsWhenNotRegistered) { + std::vector<ScoredDocumentHit> scored_document_hits = { + AddScoredDocument(/*document_id=*/1), + AddScoredDocument(/*document_id=*/0), + AddScoredDocument(/*document_id=*/2), + AddScoredDocument(/*document_id=*/4), + AddScoredDocument(/*document_id=*/3)}; + + // Creates a ResultState with 5 ScoredDocumentHits. + { + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/5), document_store()); + + { + absl_ports::unique_lock l(&result_state.mutex); + + EXPECT_THAT(num_total_hits(), Eq(0)); + result_state.IncrementNumTotalHits(500); + EXPECT_THAT(num_total_hits(), Eq(0)); + } + } + EXPECT_THAT(num_total_hits(), Eq(0)); +} + +TEST_F(ResultStateV2Test, ShouldDecrementOriginalNumTotalHitsWhenReregister) { + std::atomic<int> another_num_total_hits = 11; + + std::vector<ScoredDocumentHit> scored_document_hits = { + AddScoredDocument(/*document_id=*/1), + AddScoredDocument(/*document_id=*/0), + AddScoredDocument(/*document_id=*/2), + AddScoredDocument(/*document_id=*/4), + AddScoredDocument(/*document_id=*/3)}; + + // Creates a ResultState with 5 ScoredDocumentHits. + ResultStateV2 result_state( + std::make_unique<PriorityQueueScoredDocumentHitsRanker>( + std::move(scored_document_hits), + /*is_descending=*/true), + /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), + CreateResultSpec(/*num_per_page=*/5), document_store()); + + absl_ports::unique_lock l(&result_state.mutex); + + num_total_hits() = 7; + result_state.RegisterNumTotalHits(&num_total_hits()); + EXPECT_THAT(num_total_hits(), Eq(12)); + + result_state.RegisterNumTotalHits(&another_num_total_hits); + // The original num_total_hits should be decremented after re-registration. + EXPECT_THAT(num_total_hits(), Eq(7)); + // another_num_total_hits should be incremented after re-registration. + EXPECT_THAT(another_num_total_hits, Eq(16)); + + result_state.IncrementNumTotalHits(500); + // The original num_total_hits should be unchanged. + EXPECT_THAT(num_total_hits(), Eq(7)); + // Increment should be done on another_num_total_hits. + EXPECT_THAT(another_num_total_hits, Eq(516)); +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/result/result-state.cc b/icing/result/result-state.cc index fc89185..24f5c09 100644 --- a/icing/result/result-state.cc +++ b/icing/result/result-state.cc @@ -82,13 +82,15 @@ class GroupResultLimiter { // Returns true if the scored_document_hit should be removed. bool operator()(const ScoredDocumentHit& scored_document_hit) { - auto document_filter_data_or = document_store_.GetDocumentFilterData( - scored_document_hit.document_id()); - if (!document_filter_data_or.ok()) { + auto document_filter_data_optional = + document_store_.GetAliveDocumentFilterData( + scored_document_hit.document_id()); + if (!document_filter_data_optional) { + // Document doesn't exist. return true; } NamespaceId namespace_id = - document_filter_data_or.ValueOrDie().namespace_id(); + document_filter_data_optional.value().namespace_id(); auto iter = namespace_group_id_map_.find(namespace_id); if (iter == namespace_group_id_map_.end()) { return false; diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc index fc50ea6..653f34f 100644 --- a/icing/schema/schema-store.cc +++ b/icing/schema/schema-store.cc @@ -27,6 +27,7 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/file/destructible-directory.h" #include "icing/file/file-backed-proto.h" #include "icing/file/filesystem.h" #include "icing/proto/document.pb.h" @@ -35,7 +36,7 @@ #include "icing/schema/section-manager.h" #include "icing/schema/section.h" #include "icing/store/document-filter-data.h" -#include "icing/store/key-mapper.h" +#include "icing/store/dynamic-trie-key-mapper.h" #include "icing/util/crc32.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" @@ -49,8 +50,9 @@ constexpr char kSchemaStoreHeaderFilename[] = "schema_store_header"; constexpr char kSchemaFilename[] = "schema.pb"; constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper"; -// A KeyMapper stores its data across 3 arrays internally. Giving each array -// 128KiB for storage means the entire KeyMapper requires 384KiB. +// A DynamicTrieKeyMapper stores its data across 3 arrays internally. Giving +// each array 128KiB for storage means the entire DynamicTrieKeyMapper requires +// 384KiB. constexpr int32_t kSchemaTypeMapperMaxSize = 3 * 128 * 1024; // 384 KiB const std::string MakeHeaderFilename(const std::string& base_dir) { @@ -196,8 +198,8 @@ libtextclassifier3::Status SchemaStore::InitializeInternal( if (initialize_stats != nullptr) { initialize_stats->set_num_schema_types(type_config_map_.size()); } - has_schema_successfully_set_ = true; + return libtextclassifier3::Status::OK; } @@ -222,9 +224,9 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() { ICING_ASSIGN_OR_RETURN( schema_type_mapper_, - KeyMapper<SchemaTypeId>::Create(*filesystem_, - MakeSchemaTypeMapperFilename(base_dir_), - kSchemaTypeMapperMaxSize)); + DynamicTrieKeyMapper<SchemaTypeId>::Create( + *filesystem_, MakeSchemaTypeMapperFilename(base_dir_), + kSchemaTypeMapperMaxSize)); ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum()); if (checksum.Get() != header.checksum) { @@ -307,8 +309,9 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() { schema_type_mapper_.reset(); // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR // that can support error logging. - libtextclassifier3::Status status = KeyMapper<SchemaTypeId>::Delete( - *filesystem_, MakeSchemaTypeMapperFilename(base_dir_)); + libtextclassifier3::Status status = + DynamicTrieKeyMapper<SchemaTypeId>::Delete( + *filesystem_, MakeSchemaTypeMapperFilename(base_dir_)); if (!status.ok()) { ICING_LOG(ERROR) << status.error_message() << "Failed to delete old schema_type mapper"; @@ -316,9 +319,9 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() { } ICING_ASSIGN_OR_RETURN( schema_type_mapper_, - KeyMapper<SchemaTypeId>::Create(*filesystem_, - MakeSchemaTypeMapperFilename(base_dir_), - kSchemaTypeMapperMaxSize)); + DynamicTrieKeyMapper<SchemaTypeId>::Create( + *filesystem_, MakeSchemaTypeMapperFilename(base_dir_), + kSchemaTypeMapperMaxSize)); return libtextclassifier3::Status::OK; } @@ -447,46 +450,29 @@ libtextclassifier3::Status SchemaStore::ApplySchemaChange( std::string temp_schema_store_dir_path = base_dir_ + "_temp"; if (!filesystem_->DeleteDirectoryRecursively( temp_schema_store_dir_path.c_str())) { - ICING_LOG(WARNING) << "Failed to recursively delete " + ICING_LOG(ERROR) << "Recursively deleting " << temp_schema_store_dir_path.c_str(); return absl_ports::InternalError( "Unable to delete temp directory to prepare to build new schema " "store."); } - if (!filesystem_->CreateDirectoryRecursively( - temp_schema_store_dir_path.c_str())) { + DestructibleDirectory temp_schema_store_dir( + filesystem_, std::move(temp_schema_store_dir_path)); + if (!temp_schema_store_dir.is_valid()) { return absl_ports::InternalError( "Unable to create temp directory to build new schema store."); } // Then we create our new schema store with the new schema. - auto new_schema_store_or = - SchemaStore::Create(filesystem_, temp_schema_store_dir_path, clock_, - std::move(new_schema)); - if (!new_schema_store_or.ok()) { - // Attempt to clean up the temp directory. - if (!filesystem_->DeleteDirectoryRecursively( - temp_schema_store_dir_path.c_str())) { - // Nothing to do here. Just log an error. - ICING_LOG(WARNING) << "Failed to recursively delete " - << temp_schema_store_dir_path.c_str(); - } - return new_schema_store_or.status(); - } - std::unique_ptr<SchemaStore> new_schema_store = - std::move(new_schema_store_or).ValueOrDie(); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<SchemaStore> new_schema_store, + SchemaStore::Create(filesystem_, temp_schema_store_dir.dir(), clock_, + std::move(new_schema))); // Then we swap the new schema file + new derived files with the old files. if (!filesystem_->SwapFiles(base_dir_.c_str(), - temp_schema_store_dir_path.c_str())) { - // Attempt to clean up the temp directory. - if (!filesystem_->DeleteDirectoryRecursively( - temp_schema_store_dir_path.c_str())) { - // Nothing to do here. Just log an error. - ICING_LOG(WARNING) << "Failed to recursively delete " - << temp_schema_store_dir_path.c_str(); - } + temp_schema_store_dir.dir().c_str())) { return absl_ports::InternalError( "Unable to apply new schema due to failed swap!"); } diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h index 58e5477..82f4ffa 100644 --- a/icing/schema/schema-store.h +++ b/icing/schema/schema-store.h @@ -130,7 +130,7 @@ class SchemaStore { static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create( const Filesystem* filesystem, const std::string& base_dir, const Clock* clock, InitializeStatsProto* initialize_stats = nullptr); - + SchemaStore(SchemaStore&&) = default; SchemaStore& operator=(SchemaStore&&) = default; @@ -282,7 +282,6 @@ class SchemaStore { const Filesystem* filesystem, const std::string& base_dir, const Clock* clock, SchemaProto schema); - // Use SchemaStore::Create instead. explicit SchemaStore(const Filesystem* filesystem, std::string base_dir, const Clock* clock); diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc index 3fd41c4..ffd1292 100644 --- a/icing/schema/schema-store_test.cc +++ b/icing/schema/schema-store_test.cc @@ -18,6 +18,7 @@ #include <string> #include <vector> +#include "icing/text_classifier/lib3/utils/base/status.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/absl_ports/str_cat.h" @@ -35,7 +36,6 @@ #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" #include "icing/testing/tmp-directory.h" -#include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/util/crc32.h" namespace icing { @@ -73,8 +73,8 @@ constexpr PropertyConfigProto::DataType::Code TYPE_DOUBLE = class SchemaStoreTest : public ::testing::Test { protected: void SetUp() override { - temp_dir_ = GetTestTempDir() + "/icing"; - schema_store_dir_ = temp_dir_ + "/schema_store"; + test_dir_ = GetTestTempDir() + "/icing"; + schema_store_dir_ = test_dir_ + "/schema_store"; filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()); schema_ = @@ -93,24 +93,24 @@ class SchemaStoreTest : public ::testing::Test { // schema_store_dir_. IOW, ensure that all temporary directories have been // properly cleaned up. std::vector<std::string> sub_dirs; - ASSERT_TRUE(filesystem_.ListDirectory(temp_dir_.c_str(), &sub_dirs)); + ASSERT_TRUE(filesystem_.ListDirectory(test_dir_.c_str(), &sub_dirs)); ASSERT_THAT(sub_dirs, ElementsAre("schema_store")); // Finally, clean everything up. - ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(temp_dir_.c_str())); + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str())); } Filesystem filesystem_; - std::string temp_dir_; + std::string test_dir_; std::string schema_store_dir_; SchemaProto schema_; FakeClock fake_clock_; }; TEST_F(SchemaStoreTest, CreationWithNullPointerShouldFail) { - EXPECT_THAT( - SchemaStore::Create(/*filesystem=*/nullptr, schema_store_dir_, &fake_clock_), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, schema_store_dir_, + &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) { @@ -215,15 +215,17 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) { .AddType(SchemaTypeConfigBuilder().SetType("corrupted")) .Build(); - const std::string schema_file = absl_ports::StrCat(schema_store_dir_, "/schema.pb"); + const std::string schema_file = + absl_ports::StrCat(schema_store_dir_, "/schema.pb"); const std::string serialized_schema = corrupt_schema.SerializeAsString(); filesystem_.Write(schema_file.c_str(), serialized_schema.data(), serialized_schema.size()); // If ground truth was corrupted, we won't know what to do - EXPECT_THAT(SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + EXPECT_THAT( + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) { @@ -350,8 +352,9 @@ TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) { IsOkAndHolds(EqualsSetSchemaResult(result))); schema_store.reset(); - EXPECT_THAT(SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_), - IsOk()); + EXPECT_THAT( + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_), + IsOk()); } TEST_F(SchemaStoreTest, MultipleCreateOk) { @@ -383,7 +386,8 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) { schema_store.reset(); ICING_ASSERT_OK_AND_ASSIGN( - schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); // Verify that our in-memory structures are ok EXPECT_THAT(schema_store->GetSchemaTypeConfig("email"), @@ -1017,7 +1021,8 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) { schema_store.reset(); ICING_ASSERT_OK_AND_ASSIGN( - schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(checksum)); } @@ -1082,7 +1087,8 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) { // And we get the same schema back on reinitialization ICING_ASSERT_OK_AND_ASSIGN( - schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(schema)); } diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc index 3dcc5a9..cb7c561 100644 --- a/icing/schema/section-manager_test.cc +++ b/icing/schema/section-manager_test.cc @@ -23,6 +23,7 @@ #include "icing/proto/schema.pb.h" #include "icing/proto/term.pb.h" #include "icing/schema/schema-util.h" +#include "icing/store/dynamic-trie-key-mapper.h" #include "icing/store/key-mapper.h" #include "icing/testing/common-matchers.h" #include "icing/testing/tmp-directory.h" @@ -78,11 +79,11 @@ class SectionManagerTest : public ::testing::Test { } void SetUp() override { - // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so - // the total KeyMapper should get 384KiB + // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each + // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB int key_mapper_size = 3 * 128 * 1024; ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_, - KeyMapper<SchemaTypeId>::Create( + DynamicTrieKeyMapper<SchemaTypeId>::Create( filesystem_, test_dir_, key_mapper_size)); ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0)); ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1)); @@ -397,13 +398,14 @@ TEST_F(SectionManagerTest, type_with_non_string_properties); type_config_map.emplace(empty_type.schema_type(), empty_type); - // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so - // the total KeyMapper should get 384KiB + // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one + // 128KiB so the total DynamicTrieKeyMapper should get 384KiB int key_mapper_size = 3 * 128 * 1024; std::string dir = GetTestTempDir() + "/non_string_fields"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper, - KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size)); + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir, + key_mapper_size)); ICING_ASSERT_OK(schema_type_mapper->Put( type_with_non_string_properties.schema_type(), /*schema_type_id=*/0)); ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(), @@ -486,13 +488,14 @@ TEST_F(SectionManagerTest, AssignSectionsRecursivelyForDocumentFields) { type_config_map.emplace(type.schema_type(), type); type_config_map.emplace(document_type.schema_type(), document_type); - // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so - // the total KeyMapper should get 384KiB + // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one + // 128KiB so the total DynamicTrieKeyMapper should get 384KiB int key_mapper_size = 3 * 128 * 1024; std::string dir = GetTestTempDir() + "/recurse_into_document"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper, - KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size)); + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir, + key_mapper_size)); int type_schema_type_id = 0; int document_type_schema_type_id = 1; ICING_ASSERT_OK( @@ -560,13 +563,14 @@ TEST_F(SectionManagerTest, DontAssignSectionsRecursivelyForDocumentFields) { type_config_map.emplace(type.schema_type(), type); type_config_map.emplace(document_type.schema_type(), document_type); - // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so - // the total KeyMapper should get 384KiB + // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one + // 128KiB so the total DynamicTrieKeyMapper should get 384KiB int key_mapper_size = 3 * 128 * 1024; std::string dir = GetTestTempDir() + "/recurse_into_document"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper, - KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size)); + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir, + key_mapper_size)); int type_schema_type_id = 0; int document_type_schema_type_id = 1; ICING_ASSERT_OK( diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc index 28d385e..4b426a9 100644 --- a/icing/scoring/bm25f-calculator.cc +++ b/icing/scoring/bm25f-calculator.cc @@ -233,8 +233,9 @@ float Bm25fCalculator::ComputeTermFrequencyForMatchedSections( } SchemaTypeId Bm25fCalculator::GetSchemaTypeId(DocumentId document_id) const { - auto filter_data_or = document_store_->GetDocumentFilterData(document_id); - if (!filter_data_or.ok()) { + auto filter_data_optional = + document_store_->GetAliveDocumentFilterData(document_id); + if (!filter_data_optional) { // This should never happen. The only failure case for // GetDocumentFilterData is if the document_id is outside of the range of // allocated document_ids, which shouldn't be possible since we're getting @@ -243,8 +244,7 @@ SchemaTypeId Bm25fCalculator::GetSchemaTypeId(DocumentId document_id) const { "No document filter data for document [%d]", document_id); return kInvalidSchemaTypeId; } - DocumentFilterData data = filter_data_or.ValueOrDie(); - return data.schema_type_id(); + return filter_data_optional.value().schema_type_id(); } } // namespace lib diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker.cc b/icing/scoring/priority-queue-scored-document-hits-ranker.cc new file mode 100644 index 0000000..13da0ae --- /dev/null +++ b/icing/scoring/priority-queue-scored-document-hits-ranker.cc @@ -0,0 +1,55 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/scoring/priority-queue-scored-document-hits-ranker.h" + +#include <queue> +#include <vector> + +#include "icing/scoring/scored-document-hit.h" + +namespace icing { +namespace lib { + +PriorityQueueScoredDocumentHitsRanker::PriorityQueueScoredDocumentHitsRanker( + const std::vector<ScoredDocumentHit>& scored_document_hits, + bool is_descending) + : comparator_(/*is_ascending=*/!is_descending), + scored_document_hits_pq_(scored_document_hits.begin(), + scored_document_hits.end(), comparator_) {} + +ScoredDocumentHit PriorityQueueScoredDocumentHitsRanker::PopNext() { + ScoredDocumentHit ret = scored_document_hits_pq_.top(); + scored_document_hits_pq_.pop(); + return ret; +} + +void PriorityQueueScoredDocumentHitsRanker::TruncateHitsTo(int new_size) { + if (new_size < 0 || scored_document_hits_pq_.size() <= new_size) { + return; + } + + // Copying the best new_size results. + std::priority_queue<ScoredDocumentHit, std::vector<ScoredDocumentHit>, + Comparator> + new_pq(comparator_); + for (int i = 0; i < new_size; ++i) { + new_pq.push(scored_document_hits_pq_.top()); + scored_document_hits_pq_.pop(); + } + scored_document_hits_pq_ = std::move(new_pq); +} + +} // namespace lib +} // namespace icing diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker.h b/icing/scoring/priority-queue-scored-document-hits-ranker.h new file mode 100644 index 0000000..c104585 --- /dev/null +++ b/icing/scoring/priority-queue-scored-document-hits-ranker.h @@ -0,0 +1,72 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_ +#define ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_ + +#include <queue> +#include <vector> + +#include "icing/scoring/scored-document-hit.h" +#include "icing/scoring/scored-document-hits-ranker.h" + +namespace icing { +namespace lib { + +// ScoredDocumentHitsRanker interface implementation, based on +// std::priority_queue. We can get next top hit in O(lgN) time. +class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker { + public: + explicit PriorityQueueScoredDocumentHitsRanker( + const std::vector<ScoredDocumentHit>& scored_document_hits, + bool is_descending = true); + + ~PriorityQueueScoredDocumentHitsRanker() override = default; + + ScoredDocumentHit PopNext() override; + + void TruncateHitsTo(int new_size) override; + + int size() const override { return scored_document_hits_pq_.size(); } + + bool empty() const override { return scored_document_hits_pq_.empty(); } + + private: + // Comparator for std::priority_queue. Since std::priority is a max heap + // (descending order), reverse it if we want ascending order. + class Comparator { + public: + explicit Comparator(bool is_ascending) : is_ascending_(is_ascending) {} + + bool operator()(const ScoredDocumentHit& lhs, + const ScoredDocumentHit& rhs) const { + return is_ascending_ == !(lhs < rhs); + } + + private: + bool is_ascending_; + }; + + Comparator comparator_; + + // Use priority queue to get top K hits in O(KlgN) time. + std::priority_queue<ScoredDocumentHit, std::vector<ScoredDocumentHit>, + Comparator> + scored_document_hits_pq_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_ diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc b/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc new file mode 100644 index 0000000..a575eaf --- /dev/null +++ b/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc @@ -0,0 +1,239 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/scoring/priority-queue-scored-document-hits-ranker.h" + +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/testing/common-matchers.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +std::vector<ScoredDocumentHit> PopAll( + PriorityQueueScoredDocumentHitsRanker& ranker) { + std::vector<ScoredDocumentHit> hits; + while (!ranker.empty()) { + hits.push_back(ranker.PopNext()); + } + return hits; +} + +TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldGetCorrectSizeAndEmpty) { + ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone, + /*score=*/1); + + PriorityQueueScoredDocumentHitsRanker ranker( + {scored_hit_1, scored_hit_0, scored_hit_2}, + /*is_descending=*/true); + EXPECT_THAT(ranker.size(), Eq(3)); + EXPECT_FALSE(ranker.empty()); + + ranker.PopNext(); + EXPECT_THAT(ranker.size(), Eq(2)); + EXPECT_FALSE(ranker.empty()); + + ranker.PopNext(); + EXPECT_THAT(ranker.size(), Eq(1)); + EXPECT_FALSE(ranker.empty()); + + ranker.PopNext(); + EXPECT_THAT(ranker.size(), Eq(0)); + EXPECT_TRUE(ranker.empty()); +} + +TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldRankInDescendingOrder) { + ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone, + /*score=*/1); + + PriorityQueueScoredDocumentHitsRanker ranker( + {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3}, + /*is_descending=*/true); + + EXPECT_THAT(ranker, SizeIs(5)); + std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker); + EXPECT_THAT(scored_document_hits, + ElementsAre(EqualsScoredDocumentHit(scored_hit_4), + EqualsScoredDocumentHit(scored_hit_3), + EqualsScoredDocumentHit(scored_hit_2), + EqualsScoredDocumentHit(scored_hit_1), + EqualsScoredDocumentHit(scored_hit_0))); +} + +TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldRankInAscendingOrder) { + ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone, + /*score=*/1); + + PriorityQueueScoredDocumentHitsRanker ranker( + {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3}, + /*is_descending=*/false); + + EXPECT_THAT(ranker, SizeIs(5)); + std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker); + EXPECT_THAT(scored_document_hits, + ElementsAre(EqualsScoredDocumentHit(scored_hit_0), + EqualsScoredDocumentHit(scored_hit_1), + EqualsScoredDocumentHit(scored_hit_2), + EqualsScoredDocumentHit(scored_hit_3), + EqualsScoredDocumentHit(scored_hit_4))); +} + +TEST(PriorityQueueScoredDocumentHitsRankerTest, + ShouldRankDuplicateScoredDocumentHits) { + ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone, + /*score=*/1); + + PriorityQueueScoredDocumentHitsRanker ranker( + {scored_hit_2, scored_hit_4, scored_hit_1, scored_hit_0, scored_hit_2, + scored_hit_2, scored_hit_4, scored_hit_3}, + /*is_descending=*/true); + + EXPECT_THAT(ranker, SizeIs(8)); + std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker); + EXPECT_THAT(scored_document_hits, + ElementsAre(EqualsScoredDocumentHit(scored_hit_4), + EqualsScoredDocumentHit(scored_hit_4), + EqualsScoredDocumentHit(scored_hit_3), + EqualsScoredDocumentHit(scored_hit_2), + EqualsScoredDocumentHit(scored_hit_2), + EqualsScoredDocumentHit(scored_hit_2), + EqualsScoredDocumentHit(scored_hit_1), + EqualsScoredDocumentHit(scored_hit_0))); +} + +TEST(PriorityQueueScoredDocumentHitsRankerTest, + ShouldRankEmptyScoredDocumentHits) { + PriorityQueueScoredDocumentHitsRanker ranker(/*scored_document_hits=*/{}, + /*is_descending=*/true); + EXPECT_THAT(ranker, IsEmpty()); +} + +TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldTruncateToNewSize) { + ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone, + /*score=*/1); + + PriorityQueueScoredDocumentHitsRanker ranker( + {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3}, + /*is_descending=*/true); + ASSERT_THAT(ranker, SizeIs(5)); + + ranker.TruncateHitsTo(/*new_size=*/3); + EXPECT_THAT(ranker, SizeIs(3)); + std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker); + EXPECT_THAT(scored_document_hits, + ElementsAre(EqualsScoredDocumentHit(scored_hit_4), + EqualsScoredDocumentHit(scored_hit_3), + EqualsScoredDocumentHit(scored_hit_2))); +} + +TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldTruncateToZero) { + ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone, + /*score=*/1); + + PriorityQueueScoredDocumentHitsRanker ranker( + {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3}, + /*is_descending=*/true); + ASSERT_THAT(ranker, SizeIs(5)); + + ranker.TruncateHitsTo(/*new_size=*/0); + EXPECT_THAT(ranker, IsEmpty()); +} + +TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldNotTruncateToNegative) { + ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone, + /*score=*/1); + ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone, + /*score=*/1); + + PriorityQueueScoredDocumentHitsRanker ranker( + {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3}, + /*is_descending=*/true); + ASSERT_THAT(ranker, SizeIs(Eq(5))); + + ranker.TruncateHitsTo(/*new_size=*/-1); + EXPECT_THAT(ranker, SizeIs(Eq(5))); + // Contents are not affected. + std::vector<ScoredDocumentHit> scored_document_hits = PopAll(ranker); + EXPECT_THAT(scored_document_hits, + ElementsAre(EqualsScoredDocumentHit(scored_hit_4), + EqualsScoredDocumentHit(scored_hit_3), + EqualsScoredDocumentHit(scored_hit_2), + EqualsScoredDocumentHit(scored_hit_1), + EqualsScoredDocumentHit(scored_hit_0))); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/scoring/ranker.cc b/icing/scoring/ranker.cc index 117f44c..ad971d3 100644 --- a/icing/scoring/ranker.cc +++ b/icing/scoring/ranker.cc @@ -103,8 +103,7 @@ void HeapifyTermDown(std::vector<TermMetadata>& scored_terms, // If the minimum is not the subtree root, swap and continue heapifying the // lower level subtree. if (min != target_subtree_root_index) { - std::swap(scored_terms.at(min), - scored_terms.at(target_subtree_root_index)); + std::swap(scored_terms.at(min), scored_terms.at(target_subtree_root_index)); HeapifyTermDown(scored_terms, min); } } @@ -146,35 +145,6 @@ TermMetadata PopRootTerm(std::vector<TermMetadata>& scored_terms) { return root; } -// Helper function to extract the root from the heap. The heap structure will be -// maintained. -// -// Returns: -// The current root element on success -// RESOURCE_EXHAUSTED_ERROR if heap is empty -libtextclassifier3::StatusOr<ScoredDocumentHit> PopRoot( - std::vector<ScoredDocumentHit>* scored_document_hits_heap, - const ScoredDocumentHitComparator& scored_document_hit_comparator) { - if (scored_document_hits_heap->empty()) { - // An invalid ScoredDocumentHit - return absl_ports::ResourceExhaustedError("Heap is empty"); - } - - // Steps to extract root from heap: - // 1. copy out root - ScoredDocumentHit root = scored_document_hits_heap->at(0); - const size_t last_node_index = scored_document_hits_heap->size() - 1; - // 2. swap root and the last node - std::swap(scored_document_hits_heap->at(0), - scored_document_hits_heap->at(last_node_index)); - // 3. remove last node - scored_document_hits_heap->pop_back(); - // 4. heapify root - Heapify(scored_document_hits_heap, /*target_subtree_root_index=*/0, - scored_document_hit_comparator); - return root; -} - } // namespace void BuildHeapInPlace( @@ -203,6 +173,29 @@ void PushToTermHeap(TermMetadata term, int number_to_return, } } +libtextclassifier3::StatusOr<ScoredDocumentHit> PopNextTopResultFromHeap( + std::vector<ScoredDocumentHit>* scored_document_hits_heap, + const ScoredDocumentHitComparator& scored_document_hit_comparator) { + if (scored_document_hits_heap->empty()) { + // An invalid ScoredDocumentHit + return absl_ports::ResourceExhaustedError("Heap is empty"); + } + + // Steps to extract root from heap: + // 1. copy out root + ScoredDocumentHit root = scored_document_hits_heap->at(0); + const size_t last_node_index = scored_document_hits_heap->size() - 1; + // 2. swap root and the last node + std::swap(scored_document_hits_heap->at(0), + scored_document_hits_heap->at(last_node_index)); + // 3. remove last node + scored_document_hits_heap->pop_back(); + // 4. heapify root + Heapify(scored_document_hits_heap, /*target_subtree_root_index=*/0, + scored_document_hit_comparator); + return root; +} + std::vector<ScoredDocumentHit> PopTopResultsFromHeap( std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results, const ScoredDocumentHitComparator& scored_document_hit_comparator) { @@ -211,7 +204,8 @@ std::vector<ScoredDocumentHit> PopTopResultsFromHeap( num_results, static_cast<int>(scored_document_hits_heap->size())); while (result_size-- > 0) { libtextclassifier3::StatusOr<ScoredDocumentHit> next_best_document_hit_or = - PopRoot(scored_document_hits_heap, scored_document_hit_comparator); + PopNextTopResultFromHeap(scored_document_hits_heap, + scored_document_hit_comparator); if (next_best_document_hit_or.ok()) { scored_document_hit_result.push_back( std::move(next_best_document_hit_or).ValueOrDie()); diff --git a/icing/scoring/ranker.h b/icing/scoring/ranker.h index 81838f3..bfe1077 100644 --- a/icing/scoring/ranker.h +++ b/icing/scoring/ranker.h @@ -17,6 +17,7 @@ #include <vector> +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/index/term-metadata.h" #include "icing/scoring/scored-document-hit.h" @@ -32,6 +33,17 @@ void BuildHeapInPlace( std::vector<ScoredDocumentHit>* scored_document_hits, const ScoredDocumentHitComparator& scored_document_hit_comparator); +// Returns the single next top result (i.e. the current root element) from the +// given heap and remove it from the heap. The heap structure will be +// maintained. +// +// Returns: +// The next top result element on success +// RESOURCE_EXHAUSTED_ERROR if heap is empty +libtextclassifier3::StatusOr<ScoredDocumentHit> PopNextTopResultFromHeap( + std::vector<ScoredDocumentHit>* scored_document_hits_heap, + const ScoredDocumentHitComparator& scored_document_hit_comparator); + // Returns the top num_results results from the given heap and remove those // results from the heap. An empty vector will be returned if heap is empty. // diff --git a/icing/scoring/ranker_benchmark.cc b/icing/scoring/ranker_benchmark.cc index 8983dd9..c2f13de 100644 --- a/icing/scoring/ranker_benchmark.cc +++ b/icing/scoring/ranker_benchmark.cc @@ -27,7 +27,7 @@ namespace { // $ blaze build -c opt --dynamic_mode=off --copt=-gmlt // //icing/scoring:ranker_benchmark // -// $ blaze-bin/icing/scoring/ranker_benchmark --benchmarks=all +// $ blaze-bin/icing/scoring/ranker_benchmark --benchmark_filter=all // --benchmark_memory_usage // // Run on an Android device: @@ -38,7 +38,7 @@ namespace { // $ adb push blaze-bin/icing/scoring/ranker_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/ranker_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/ranker_benchmark --benchmark_filter=all void BM_GetTopN(benchmark::State& state) { int num_to_score = state.range(0); diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc index cc1d995..44dda3c 100644 --- a/icing/scoring/score-and-rank_benchmark.cc +++ b/icing/scoring/score-and-rank_benchmark.cc @@ -49,7 +49,7 @@ // //icing/scoring:score-and-rank_benchmark // // $ blaze-bin/icing/scoring/score-and-rank_benchmark -// --benchmarks=all --benchmark_memory_usage +// --benchmark_filter=all --benchmark_memory_usage // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -59,7 +59,7 @@ // $ adb push blaze-bin/icing/scoring/score-and-rank_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/score-and-rank_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/score-and-rank_benchmark --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/scoring/scored-document-hits-ranker.h b/icing/scoring/scored-document-hits-ranker.h new file mode 100644 index 0000000..0287452 --- /dev/null +++ b/icing/scoring/scored-document-hits-ranker.h @@ -0,0 +1,53 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_ +#define ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_ + +#include "icing/scoring/scored-document-hit.h" + +namespace icing { +namespace lib { + +// TODO(sungyc): re-evaluate other similar implementations (e.g. std::sort + +// std::queue/std::vector). Also revisit the capacity shrinking +// issue for PopNext(). + +// ScoredDocumentHitsRanker is an interface class for ranking +// ScoredDocumentHits. +class ScoredDocumentHitsRanker { + public: + virtual ~ScoredDocumentHitsRanker() = default; + + // Pop the next top ScoredDocumentHit and return. It is undefined to call + // PopNext on an empty ranker, so the caller should check if it is not empty + // before calling. + virtual ScoredDocumentHit PopNext() = 0; + + // Truncates the remaining ScoredDocumentHits to the given size. The best + // ScoredDocumentHits (according to the ranking policy) should be kept. + // If new_size is invalid (< 0), or greater or equal to # of remaining + // ScoredDocumentHits, then no action will be taken. Otherwise truncates the + // the remaining ScoredDocumentHits to the given size. + virtual void TruncateHitsTo(int new_size) = 0; + + virtual int size() const = 0; + + virtual bool empty() const = 0; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_ diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc index 5e23a8e..1739a50 100644 --- a/icing/store/document-log-creator.cc +++ b/icing/store/document-log-creator.cc @@ -18,7 +18,6 @@ #include <string> #include <utility> -#include "icing/text_classifier/lib3/utils/base/logging.h" #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/annotate.h" diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 8c8369c..aa3122b 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -46,13 +46,14 @@ #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" #include "icing/store/document-log-creator.h" -#include "icing/store/key-mapper.h" +#include "icing/store/dynamic-trie-key-mapper.h" #include "icing/store/namespace-id.h" #include "icing/store/usage-store.h" #include "icing/tokenization/language-segmenter.h" #include "icing/util/clock.h" #include "icing/util/crc32.h" #include "icing/util/data-loss.h" +#include "icing/util/fingerprint-util.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" #include "icing/util/tokenized-document.h" @@ -77,8 +78,8 @@ constexpr char kCorpusIdMapperFilename[] = "corpus_mapper"; // because we allow up to 1 million DocumentIds. constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024; // 36 MiB -// 384 KiB for a KeyMapper would allow each internal array to have a max of -// 128 KiB for storage. +// 384 KiB for a DynamicTrieKeyMapper would allow each internal array to have a +// max of 128 KiB for storage. constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024; // 384 KiB constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024; // 384 KiB @@ -125,22 +126,13 @@ std::string MakeCorpusMapperFilename(const std::string& base_dir) { // overhead per key. As we know that these fingerprints are always 8-bytes in // length and that they're random, we might be able to store them more // compactly. -std::string MakeFingerprint(std::string_view name_space, std::string_view uri) { +std::string MakeFingerprint(std::string_view field1, std::string_view field2) { // Using a 64-bit fingerprint to represent the key could lead to collisions. // But, even with 200K unique keys, the probability of collision is about // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack). uint64_t fprint = - tc3farmhash::Fingerprint64(absl_ports::StrCat(name_space, uri)); - - std::string encoded_fprint; - // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in - // base128 and add 1 to make sure that no byte is '0'. This increases the - // size of the encoded_fprint from 8-bytes to 10-bytes. - while (fprint) { - encoded_fprint.push_back((fprint & 0x7F) + 1); - fprint >>= 7; - } - return encoded_fprint; + tc3farmhash::Fingerprint64(absl_ports::StrCat(field1, field2)); + return fingerprint_util::GetFingerprintString(fprint); } int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms, @@ -266,12 +258,13 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( GetRecoveryCause(create_result, force_recovery_and_revalidate_documents); if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) { - ICING_LOG(WARNING) << "Starting Document Store Recovery with cause=" - << recovery_cause << ", and create result { new_file=" - << create_result.new_file << ", preeisting_file_version=" - << create_result.preexisting_file_version << ", data_loss=" - << create_result.log_create_result.data_loss << "} and kCurrentVersion=" - << DocumentLogCreator::kCurrentVersion; + ICING_LOG(INFO) << "Starting Document Store Recovery with cause=" + << recovery_cause << ", and create result { new_file=" + << create_result.new_file << ", preeisting_file_version=" + << create_result.preexisting_file_version << ", data_loss=" + << create_result.log_create_result.data_loss + << "} and kCurrentVersion=" + << DocumentLogCreator::kCurrentVersion; // We can't rely on any existing derived files. Recreate them from scratch. // Currently happens if: // 1) This is a new log and we don't have derived files yet @@ -348,8 +341,11 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() { // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN // that can support error logging. - auto document_key_mapper_or = - KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize); + auto document_key_mapper_or = DynamicTrieKeyMapper< + DocumentId, + fingerprint_util::FingerprintStringFormatter>::Create(*filesystem_, + base_dir_, + kUriMapperMaxSize); if (!document_key_mapper_or.ok()) { ICING_LOG(ERROR) << document_key_mapper_or.status().error_message() << "Failed to initialize KeyMapper"; @@ -381,18 +377,23 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() { ICING_ASSIGN_OR_RETURN( namespace_mapper_, - KeyMapper<NamespaceId>::Create(*filesystem_, - MakeNamespaceMapperFilename(base_dir_), - kNamespaceMapperMaxSize)); + DynamicTrieKeyMapper<NamespaceId>::Create( + *filesystem_, MakeNamespaceMapperFilename(base_dir_), + kNamespaceMapperMaxSize)); ICING_ASSIGN_OR_RETURN( usage_store_, UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_))); - ICING_ASSIGN_OR_RETURN(corpus_mapper_, - KeyMapper<CorpusId>::Create( - *filesystem_, MakeCorpusMapperFilename(base_dir_), - kCorpusMapperMaxSize)); + auto corpus_mapper_or = + DynamicTrieKeyMapper<CorpusId, + fingerprint_util::FingerprintStringFormatter>:: + Create(*filesystem_, MakeCorpusMapperFilename(base_dir_), + kCorpusMapperMaxSize); + if (!corpus_mapper_or.ok()) { + return std::move(corpus_mapper_or).status(); + } + corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie(); ICING_ASSIGN_OR_RETURN(corpus_score_cache_, FileBackedVector<CorpusAssociatedScoreData>::Create( @@ -561,7 +562,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() { // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR // that can support error logging. libtextclassifier3::Status status = - KeyMapper<DocumentId>::Delete(*filesystem_, base_dir_); + DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem_, base_dir_); if (!status.ok()) { ICING_LOG(ERROR) << status.error_message() << "Failed to delete old key mapper"; @@ -570,8 +571,11 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() { // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN // that can support error logging. - auto document_key_mapper_or = - KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize); + auto document_key_mapper_or = DynamicTrieKeyMapper< + DocumentId, + fingerprint_util::FingerprintStringFormatter>::Create(*filesystem_, + base_dir_, + kUriMapperMaxSize); if (!document_key_mapper_or.ok()) { ICING_LOG(ERROR) << document_key_mapper_or.status().error_message() << "Failed to re-init key mapper"; @@ -648,7 +652,7 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() { namespace_mapper_.reset(); // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR // that can support error logging. - libtextclassifier3::Status status = KeyMapper<NamespaceId>::Delete( + libtextclassifier3::Status status = DynamicTrieKeyMapper<NamespaceId>::Delete( *filesystem_, MakeNamespaceMapperFilename(base_dir_)); if (!status.ok()) { ICING_LOG(ERROR) << status.error_message() @@ -657,9 +661,9 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() { } ICING_ASSIGN_OR_RETURN( namespace_mapper_, - KeyMapper<NamespaceId>::Create(*filesystem_, - MakeNamespaceMapperFilename(base_dir_), - kNamespaceMapperMaxSize)); + DynamicTrieKeyMapper<NamespaceId>::Create( + *filesystem_, MakeNamespaceMapperFilename(base_dir_), + kNamespaceMapperMaxSize)); return libtextclassifier3::Status::OK; } @@ -668,17 +672,22 @@ libtextclassifier3::Status DocumentStore::ResetCorpusMapper() { corpus_mapper_.reset(); // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR // that can support error logging. - libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete( + libtextclassifier3::Status status = DynamicTrieKeyMapper<CorpusId>::Delete( *filesystem_, MakeCorpusMapperFilename(base_dir_)); if (!status.ok()) { ICING_LOG(ERROR) << status.error_message() << "Failed to delete old corpus_id mapper"; return status; } - ICING_ASSIGN_OR_RETURN(corpus_mapper_, - KeyMapper<CorpusId>::Create( - *filesystem_, MakeCorpusMapperFilename(base_dir_), - kCorpusMapperMaxSize)); + auto corpus_mapper_or = + DynamicTrieKeyMapper<CorpusId, + fingerprint_util::FingerprintStringFormatter>:: + Create(*filesystem_, MakeCorpusMapperFilename(base_dir_), + kCorpusMapperMaxSize); + if (!corpus_mapper_or.ok()) { + return std::move(corpus_mapper_or).status(); + } + corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie(); return libtextclassifier3::Status::OK; } @@ -931,7 +940,18 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get( libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get( DocumentId document_id, bool clear_internal_fields) const { - ICING_RETURN_IF_ERROR(DoesDocumentExistWithStatus(document_id)); + auto document_filter_data_optional_ = GetAliveDocumentFilterData(document_id); + if (!document_filter_data_optional_) { + // The document doesn't exist. Let's check if the document id is invalid, we + // will return InvalidArgumentError. Otherwise we should return NOT_FOUND + // error. + if (!IsDocumentIdValid(document_id)) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Document id '%d' invalid.", document_id)); + } + return absl_ports::NotFoundError(IcingStringUtil::StringPrintf( + "Document id '%d' doesn't exist", document_id)); + } auto document_log_offset_or = document_id_mapper_->Get(document_id); if (!document_log_offset_or.ok()) { @@ -991,7 +1011,7 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const { } const DocumentFilterData* data = status_or_data.ValueOrDie(); - if (InternalDoesDocumentExist(document_id)) { + if (GetAliveDocumentFilterData(document_id)) { existing_namespace_ids.insert(data->namespace_id()); } } @@ -1004,43 +1024,15 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const { return existing_namespaces; } -bool DocumentStore::DoesDocumentExist(DocumentId document_id) const { - if (!IsDocumentIdValid(document_id)) { - return false; - } - - if (document_id >= document_id_mapper_->num_elements()) { - // Somehow got an validly constructed document_id that the document store - // doesn't know about - return false; - } - - return InternalDoesDocumentExist(document_id); -} - -libtextclassifier3::Status DocumentStore::DoesDocumentExistWithStatus( +std::optional<DocumentFilterData> DocumentStore::GetAliveDocumentFilterData( DocumentId document_id) const { if (!IsDocumentIdValid(document_id)) { - return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( - "Document id '%d' invalid.", document_id)); + return std::nullopt; } - - if (document_id >= document_id_mapper_->num_elements()) { - // Somehow got a validly constructed document_id that the document store - // doesn't know about. - return absl_ports::NotFoundError(IcingStringUtil::StringPrintf( - "Unknown document id '%d'.", document_id)); + if (IsDeleted(document_id)) { + return std::nullopt; } - - if (!InternalDoesDocumentExist(document_id)) { - return absl_ports::NotFoundError(IcingStringUtil::StringPrintf( - "Document id '%d' doesn't exist", document_id)); - }; - return libtextclassifier3::Status::OK; -} - -bool DocumentStore::InternalDoesDocumentExist(DocumentId document_id) const { - return !IsDeleted(document_id) && !IsExpired(document_id); + return GetNonExpiredDocumentFilterData(document_id); } bool DocumentStore::IsDeleted(DocumentId document_id) const { @@ -1057,21 +1049,27 @@ bool DocumentStore::IsDeleted(DocumentId document_id) const { return file_offset == kDocDeletedFlag; } -bool DocumentStore::IsExpired(DocumentId document_id) const { - auto filter_data_or = filter_cache_->Get(document_id); +// Returns DocumentFilterData if the document is not expired. Otherwise, +// std::nullopt. +std::optional<DocumentFilterData> +DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id) const { + auto filter_data_or = filter_cache_->GetCopy(document_id); if (!filter_data_or.ok()) { // This would only happen if document_id is out of range of the // filter_cache, meaning we got some invalid document_id. Callers should // already have checked that their document_id is valid or used // DoesDocumentExist(WithStatus). Regardless, return true since the // document doesn't exist. - return true; + return std::nullopt; } - const DocumentFilterData* filter_data = filter_data_or.ValueOrDie(); + DocumentFilterData document_filter_data = filter_data_or.ValueOrDie(); // Check if it's past the expiration time - return clock_.GetSystemTimeMilliseconds() >= - filter_data->expiration_timestamp_ms(); + if (clock_.GetSystemTimeMilliseconds() >= + document_filter_data.expiration_timestamp_ms()) { + return std::nullopt; + } + return document_filter_data; } libtextclassifier3::Status DocumentStore::Delete( @@ -1088,7 +1086,17 @@ libtextclassifier3::Status DocumentStore::Delete( } libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id) { - ICING_RETURN_IF_ERROR(DoesDocumentExistWithStatus(document_id)); + auto document_filter_data_optional_ = GetAliveDocumentFilterData(document_id); + if (!document_filter_data_optional_) { + // The document doesn't exist. We should return InvalidArgumentError if the + // document id is invalid. Otherwise we should return NOT_FOUND error. + if (!IsDocumentIdValid(document_id)) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Document id '%d' invalid.", document_id)); + } + return absl_ports::NotFoundError(IcingStringUtil::StringPrintf( + "Document id '%d' doesn't exist", document_id)); + } auto document_log_offset_or = document_id_mapper_->Get(document_id); if (!document_log_offset_or.ok()) { @@ -1113,7 +1121,7 @@ libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId( libtextclassifier3::StatusOr<DocumentAssociatedScoreData> DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const { - if (!DoesDocumentExist(document_id)) { + if (!GetAliveDocumentFilterData(document_id)) { return absl_ports::NotFoundError(IcingStringUtil::StringPrintf( "Can't get usage scores, document id '%d' doesn't exist", document_id)); } @@ -1162,27 +1170,9 @@ DocumentStore::GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const { return corpus_scoring_data_or.status(); } -libtextclassifier3::StatusOr<DocumentFilterData> -DocumentStore::GetDocumentFilterData(DocumentId document_id) const { - if (!DoesDocumentExist(document_id)) { - return absl_ports::NotFoundError(IcingStringUtil::StringPrintf( - "Can't get filter data, document id '%d' doesn't exist", document_id)); - } - - auto filter_data_or = filter_cache_->GetCopy(document_id); - if (!filter_data_or.ok()) { - ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id - << " from filter_cache_"; - return filter_data_or.status(); - } - DocumentFilterData document_filter_data = - std::move(filter_data_or).ValueOrDie(); - return document_filter_data; -} - libtextclassifier3::StatusOr<UsageStore::UsageScores> DocumentStore::GetUsageScores(DocumentId document_id) const { - if (!DoesDocumentExist(document_id)) { + if (!GetAliveDocumentFilterData(document_id)) { return absl_ports::NotFoundError(IcingStringUtil::StringPrintf( "Can't get usage scores, document id '%d' doesn't exist", document_id)); } @@ -1197,7 +1187,7 @@ libtextclassifier3::Status DocumentStore::ReportUsage( // We can use the internal version here because we got our document_id from // our internal data structures. We would have thrown some error if the // namespace and/or uri were incorrect. - if (!InternalDoesDocumentExist(document_id)) { + if (!GetAliveDocumentFilterData(document_id)) { // Document was probably deleted or expired. return absl_ports::NotFoundError(absl_ports::StrCat( "Couldn't report usage on a nonexistent document: (namespace: '", @@ -1415,7 +1405,7 @@ DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts( UsageStore::UsageScores usage_scores = usage_scores_or.ValueOrDie(); // Update our stats - if (IsExpired(document_id)) { + if (!GetNonExpiredDocumentFilterData(document_id)) { ++total_num_expired; namespace_storage_info.set_num_expired_documents( namespace_storage_info.num_expired_documents() + 1); @@ -1529,7 +1519,7 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore( int size = document_id_mapper_->num_elements(); for (DocumentId document_id = 0; document_id < size; document_id++) { - if (!InternalDoesDocumentExist(document_id)) { + if (!GetAliveDocumentFilterData(document_id)) { // Skip nonexistent documents continue; } @@ -1611,7 +1601,7 @@ libtextclassifier3::Status DocumentStore::OptimizeInto( if (absl_ports::IsNotFound(document_or.status())) { if (IsDeleted(document_id)) { ++num_deleted; - } else if (IsExpired(document_id)) { + } else if (!GetNonExpiredDocumentFilterData(document_id)) { ++num_expired; } continue; @@ -1680,7 +1670,7 @@ DocumentStore::GetOptimizeInfo() const { int32_t num_documents = document_id_mapper_->num_elements(); for (DocumentId document_id = kMinDocumentId; document_id < num_documents; ++document_id) { - if (!InternalDoesDocumentExist(document_id)) { + if (!GetAliveDocumentFilterData(document_id)) { ++optimize_info.optimizable_docs; } @@ -1713,8 +1703,8 @@ DocumentStore::GetOptimizeInfo() const { ICING_ASSIGN_OR_RETURN(const int64_t usage_store_file_size, usage_store_->GetElementsFileSize()); - // We use a combined disk usage and file size for the KeyMapper because it's - // backed by a trie, which has some sparse property bitmaps. + // We use a combined disk usage and file size for the DynamicTrieKeyMapper + // because it's backed by a trie, which has some sparse property bitmaps. ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_size, document_key_mapper_->GetElementsSize()); @@ -1794,7 +1784,7 @@ DocumentStore::CollectCorpusInfo() const { const SchemaProto* schema_proto = schema_proto_or.ValueOrDie(); for (DocumentId document_id = 0; document_id < filter_cache_->num_elements(); ++document_id) { - if (!InternalDoesDocumentExist(document_id)) { + if (!GetAliveDocumentFilterData(document_id)) { continue; } ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data, diff --git a/icing/store/document-store.h b/icing/store/document-store.h index e6d2e5c..450b1b9 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -48,6 +48,7 @@ #include "icing/util/crc32.h" #include "icing/util/data-loss.h" #include "icing/util/document-validator.h" +#include "icing/util/fingerprint-util.h" namespace icing { namespace lib { @@ -198,19 +199,6 @@ class DocumentStore { // or expired). Order of namespaces is undefined. std::vector<std::string> GetAllNamespaces() const; - // Check if a document exists. Existence means it hasn't been deleted and it - // hasn't expired yet. - // - // NOTE: This should be used when callers don't care about error messages, - // expect documents to be deleted/not found, or in frequently called code - // paths that could cause performance issues. A signficant amount of CPU - // cycles can be saved if we don't construct strings and create new Status - // objects on the heap. See b/185822483. - // - // Returns: - // boolean whether a document exists or not - bool DoesDocumentExist(DocumentId document_id) const; - // Deletes the document identified by the given namespace and uri. The // document proto will be erased immediately. // @@ -280,14 +268,15 @@ class DocumentStore { libtextclassifier3::StatusOr<CorpusAssociatedScoreData> GetCorpusAssociatedScoreData(CorpusId corpus_id) const; - // Returns the DocumentFilterData of the document specified by the DocumentId. + // Gets the document filter data if a document exists. Otherwise, will get a + // false optional. + // + // Existence means it hasn't been deleted and it hasn't expired yet. // // Returns: - // DocumentFilterData on success - // OUT_OF_RANGE if document_id is negative or exceeds previously seen - // DocumentIds - // NOT_FOUND if the document or the filter data is not found - libtextclassifier3::StatusOr<DocumentFilterData> GetDocumentFilterData( + // True:DocumentFilterData if the given document exists. + // False if the given document doesn't exist. + std::optional<DocumentFilterData> GetAliveDocumentFilterData( DocumentId document_id) const; // Gets the usage scores of a document. @@ -455,7 +444,9 @@ class DocumentStore { std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_; // Key (namespace + uri) to DocumentId mapping - std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_; + std::unique_ptr< + KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>> + document_key_mapper_; // DocumentId to file offset mapping std::unique_ptr<FileBackedVector<int64_t>> document_id_mapper_; @@ -491,7 +482,9 @@ class DocumentStore { // unique id. A coprus is assigned an // id when the first document belonging to that corpus is added to the // DocumentStore. Corpus ids may be removed from the mapper during compaction. - std::unique_ptr<KeyMapper<CorpusId>> corpus_mapper_; + std::unique_ptr< + KeyMapper<CorpusId, fingerprint_util::FingerprintStringFormatter>> + corpus_mapper_; // A storage class that caches all usage scores. Usage scores are not // considered as ground truth. Usage scores are associated with document ids @@ -648,18 +641,6 @@ class DocumentStore { libtextclassifier3::Status DoesDocumentExistWithStatus( DocumentId document_id) const; - // Check if a document exists. Existence means it hasn't been deleted and it - // hasn't expired yet. - // - // This is for internal-use only because we assume that the document_id is - // already valid. If you're unsure if the document_id is valid, use - // DoesDocumentExist(document_id) instead, which will perform those additional - // checks. - // - // Returns: - // boolean whether a document exists or not - bool InternalDoesDocumentExist(DocumentId document_id) const; - // Checks if a document has been deleted // // This is for internal-use only because we assume that the document_id is @@ -674,7 +655,12 @@ class DocumentStore { // already valid. If you're unsure if the document_id is valid, use // DoesDocumentExist(document_id) instead, which will perform those additional // checks. - bool IsExpired(DocumentId document_id) const; + + // Returns: + // True:DocumentFilterData if the given document isn't expired. + // False if the given doesn't document is expired. + std::optional<DocumentFilterData> GetNonExpiredDocumentFilterData( + DocumentId document_id) const; // Updates the entry in the score cache for document_id. libtextclassifier3::Status UpdateDocumentAssociatedScoreCache( diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc index fc3fd9d..c4d2346 100644 --- a/icing/store/document-store_benchmark.cc +++ b/icing/store/document-store_benchmark.cc @@ -46,7 +46,7 @@ // //icing/store:document-store_benchmark // // $ blaze-bin/icing/store/document-store_benchmark -// --benchmarks=all --benchmark_memory_usage +// --benchmark_filter=all --benchmark_memory_usage // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -57,7 +57,7 @@ // /data/local/tmp/ // // $ adb shell /data/local/tmp/document-store_benchmark -// --benchmarks=all +// --benchmark_filter=all namespace icing { namespace lib { @@ -164,7 +164,8 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) { // Check random document ids to see if they exist. Hopefully to simulate // page faulting in different sections of our mmapped derived files. int document_id = dist(random); - benchmark::DoNotOptimize(document_store->DoesDocumentExist(document_id)); + benchmark::DoNotOptimize( + document_store->GetAliveDocumentFilterData(document_id)); } } BENCHMARK(BM_DoesDocumentExistBenchmark); diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index a30b4e4..59e5d74 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -358,23 +358,22 @@ TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store->Put(DocumentProto(test_document2_))); - EXPECT_THAT(doc_store->DoesDocumentExist(document_id1), IsTrue()); - EXPECT_THAT(doc_store->DoesDocumentExist(document_id2), IsTrue()); + EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(document_id1)); + EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(document_id2)); DocumentId invalid_document_id_negative = -1; - EXPECT_THAT(doc_store->DoesDocumentExist(invalid_document_id_negative), - IsFalse()); + EXPECT_FALSE( + doc_store->GetAliveDocumentFilterData(invalid_document_id_negative)); DocumentId invalid_document_id_greater_than_max = kMaxDocumentId + 2; - EXPECT_THAT( - doc_store->DoesDocumentExist(invalid_document_id_greater_than_max), - IsFalse()); + EXPECT_FALSE(doc_store->GetAliveDocumentFilterData( + invalid_document_id_greater_than_max)); - EXPECT_THAT(doc_store->DoesDocumentExist(kInvalidDocumentId), IsFalse()); + EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(kInvalidDocumentId)); DocumentId invalid_document_id_out_of_range = document_id2 + 1; - EXPECT_THAT(doc_store->DoesDocumentExist(invalid_document_id_out_of_range), - IsFalse()); + EXPECT_FALSE( + doc_store->GetAliveDocumentFilterData(invalid_document_id_out_of_range)); } TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) { @@ -485,6 +484,35 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) { EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } +TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + // Validates that deleting something non-existing won't append anything to + // ground truth + int64_t document_log_size_before = filesystem_.GetFileSize( + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); + + libtextclassifier3::Status status = + document_store->Delete("android$contacts/", "661"); + EXPECT_THAT(status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + for (char c : status.error_message()) { + EXPECT_THAT(std::isprint(c), IsTrue()); + } + + int64_t document_log_size_after = filesystem_.GetFileSize( + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); + EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); +} + TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -1130,12 +1158,15 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) { StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); EXPECT_THAT(doc_store->Get(document_id2), IsOkAndHolds(EqualsProto(test_document2_))); - // Checks derived filter cache - EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2), - IsOkAndHolds(DocumentFilterData( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData doc_filter_data, + doc_store->GetAliveDocumentFilterData(document_id2)); + EXPECT_THAT(doc_filter_data, + Eq(DocumentFilterData( /*namespace_id=*/0, /*schema_type_id=*/0, document2_expiration_timestamp_))); + // Checks derived score cache EXPECT_THAT( doc_store->GetDocumentAssociatedScoreData(document_id2), @@ -1220,10 +1251,14 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) { IsOkAndHolds(EqualsProto(test_document2_))); // Checks derived filter cache - EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2), - IsOkAndHolds(DocumentFilterData( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData doc_filter_data, + doc_store->GetAliveDocumentFilterData(document_id2)); + EXPECT_THAT(doc_filter_data, + Eq(DocumentFilterData( /*namespace_id=*/0, /*schema_type_id=*/0, document2_expiration_timestamp_))); + // Checks derived score cache - note that they aren't regenerated from // scratch. EXPECT_THAT( @@ -1293,8 +1328,11 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) { IsOkAndHolds(EqualsProto(test_document2_))); // Checks derived filter cache - EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2), - IsOkAndHolds(DocumentFilterData( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData doc_filter_data, + doc_store->GetAliveDocumentFilterData(document_id2)); + EXPECT_THAT(doc_filter_data, + Eq(DocumentFilterData( /*namespace_id=*/0, /*schema_type_id=*/0, document2_expiration_timestamp_))); // Checks derived score cache @@ -1704,8 +1742,7 @@ TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) { std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); - EXPECT_THAT(doc_store->GetDocumentFilterData(/*document_id=*/0), - StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(/*document_id=*/0)); } TEST_F(DocumentStoreTest, DeleteClearsFilterCache) { @@ -1719,17 +1756,17 @@ TEST_F(DocumentStoreTest, DeleteClearsFilterCache) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(test_document1_)); - EXPECT_THAT( - doc_store->GetDocumentFilterData(document_id), - IsOkAndHolds(DocumentFilterData( - /*namespace_id=*/0, - /*schema_type_id=*/0, - /*expiration_timestamp_ms=*/document1_expiration_timestamp_))); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData doc_filter_data, + doc_store->GetAliveDocumentFilterData(document_id)); + EXPECT_THAT(doc_filter_data, + Eq(DocumentFilterData( + /*namespace_id=*/0, + /*schema_type_id=*/0, document1_expiration_timestamp_))); ICING_ASSERT_OK(doc_store->Delete("icing", "email/1")); // Associated entry of the deleted document is removed. - EXPECT_THAT(doc_store->GetDocumentFilterData(document_id), - StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(document_id)); } TEST_F(DocumentStoreTest, DeleteClearsScoreCache) { @@ -1857,12 +1894,13 @@ TEST_F(DocumentStoreTest, std::move(create_result.document_store); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document)); - - EXPECT_THAT( - doc_store->GetDocumentFilterData(document_id), - IsOkAndHolds(DocumentFilterData(/*namespace_id=*/0, - /*schema_type_id=*/0, - /*expiration_timestamp_ms=*/1100))); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData doc_filter_data, + doc_store->GetAliveDocumentFilterData(document_id)); + EXPECT_THAT(doc_filter_data, Eq(DocumentFilterData( + /*namespace_id=*/0, + /*schema_type_id=*/0, + /*expiration_timestamp_ms=*/1100))); } TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) { @@ -1882,9 +1920,13 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData doc_filter_data, + doc_store->GetAliveDocumentFilterData(document_id)); + EXPECT_THAT( - doc_store->GetDocumentFilterData(document_id), - IsOkAndHolds(DocumentFilterData( + doc_filter_data, + Eq(DocumentFilterData( /*namespace_id=*/0, /*schema_type_id=*/0, /*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max()))); @@ -1908,9 +1950,13 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData doc_filter_data, + doc_store->GetAliveDocumentFilterData(document_id)); + EXPECT_THAT( - doc_store->GetDocumentFilterData(document_id), - IsOkAndHolds(DocumentFilterData( + doc_filter_data, + Eq(DocumentFilterData( /*namespace_id=*/0, /*schema_type_id=*/0, /*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max()))); @@ -2108,9 +2154,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { email_document_id, document_store->Put(DocumentProto(email_document))); EXPECT_THAT(document_store->Get(email_document_id), IsOkAndHolds(EqualsProto(email_document))); - ICING_ASSERT_OK_AND_ASSIGN( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( DocumentFilterData email_data, - document_store->GetDocumentFilterData(email_document_id)); + document_store->GetAliveDocumentFilterData(email_document_id)); EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id)); email_namespace_id = email_data.namespace_id(); email_expiration_timestamp = email_data.expiration_timestamp_ms(); @@ -2121,9 +2167,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { document_store->Put(DocumentProto(message_document))); EXPECT_THAT(document_store->Get(message_document_id), IsOkAndHolds(EqualsProto(message_document))); - ICING_ASSERT_OK_AND_ASSIGN( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( DocumentFilterData message_data, - document_store->GetDocumentFilterData(message_document_id)); + document_store->GetAliveDocumentFilterData(message_document_id)); EXPECT_THAT(message_data.schema_type_id(), Eq(message_schema_type_id)); message_namespace_id = message_data.namespace_id(); message_expiration_timestamp = message_data.expiration_timestamp_ms(); @@ -2161,9 +2207,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { // "email" document is fine EXPECT_THAT(document_store->Get(email_document_id), IsOkAndHolds(EqualsProto(email_document))); - ICING_ASSERT_OK_AND_ASSIGN( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( DocumentFilterData email_data, - document_store->GetDocumentFilterData(email_document_id)); + document_store->GetAliveDocumentFilterData(email_document_id)); EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id)); // Make sure that all the other fields are stll valid/the same EXPECT_THAT(email_data.namespace_id(), Eq(email_namespace_id)); @@ -2173,9 +2219,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { // "message" document has an invalid SchemaTypeId EXPECT_THAT(document_store->Get(message_document_id), IsOkAndHolds(EqualsProto(message_document))); - ICING_ASSERT_OK_AND_ASSIGN( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( DocumentFilterData message_data, - document_store->GetDocumentFilterData(message_document_id)); + document_store->GetAliveDocumentFilterData(message_document_id)); EXPECT_THAT(message_data.schema_type_id(), Eq(-1)); // Make sure that all the other fields are stll valid/the same EXPECT_THAT(message_data.namespace_id(), Eq(message_namespace_id)); @@ -2227,16 +2273,16 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id, document_store->Put(email_document)); - ICING_ASSERT_OK_AND_ASSIGN( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( DocumentFilterData email_data, - document_store->GetDocumentFilterData(email_document_id)); + document_store->GetAliveDocumentFilterData(email_document_id)); EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id)); ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id, document_store->Put(message_document)); - ICING_ASSERT_OK_AND_ASSIGN( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( DocumentFilterData message_data, - document_store->GetDocumentFilterData(message_document_id)); + document_store->GetAliveDocumentFilterData(message_document_id)); EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id)); // Rearrange the schema types. Since SchemaTypeId is assigned based on order, @@ -2260,12 +2306,14 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) { ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get())); // Check that the FilterCache holds the new SchemaTypeIds - ICING_ASSERT_OK_AND_ASSIGN( - email_data, document_store->GetDocumentFilterData(email_document_id)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + email_data, + document_store->GetAliveDocumentFilterData(email_document_id)); EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id)); - ICING_ASSERT_OK_AND_ASSIGN( - message_data, document_store->GetDocumentFilterData(message_document_id)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + message_data, + document_store->GetAliveDocumentFilterData(message_document_id)); EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id)); } @@ -2457,16 +2505,16 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id, document_store->Put(email_document)); - ICING_ASSERT_OK_AND_ASSIGN( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( DocumentFilterData email_data, - document_store->GetDocumentFilterData(email_document_id)); + document_store->GetAliveDocumentFilterData(email_document_id)); EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id)); ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id, document_store->Put(message_document)); - ICING_ASSERT_OK_AND_ASSIGN( + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( DocumentFilterData message_data, - document_store->GetDocumentFilterData(message_document_id)); + document_store->GetAliveDocumentFilterData(message_document_id)); EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id)); // Rearrange the schema types. Since SchemaTypeId is assigned based on order, @@ -2492,12 +2540,14 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) { schema_store.get(), set_schema_result)); // Check that the FilterCache holds the new SchemaTypeIds - ICING_ASSERT_OK_AND_ASSIGN( - email_data, document_store->GetDocumentFilterData(email_document_id)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + email_data, + document_store->GetAliveDocumentFilterData(email_document_id)); EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id)); - ICING_ASSERT_OK_AND_ASSIGN( - message_data, document_store->GetDocumentFilterData(message_document_id)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + message_data, + document_store->GetAliveDocumentFilterData(message_document_id)); EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id)); } @@ -3379,8 +3429,9 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) { .SetTtlMs(document1_ttl_) .Build(); ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc)); - ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data, - doc_store->GetDocumentFilterData(docid)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData filter_data, + doc_store->GetAliveDocumentFilterData(docid)); ASSERT_THAT(filter_data.schema_type_id(), Eq(0)); } @@ -3420,8 +3471,9 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) { std::move(create_result.document_store); // Ensure that the type id of the email document has been correctly updated. - ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data, - doc_store->GetDocumentFilterData(docid)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData filter_data, + doc_store->GetAliveDocumentFilterData(docid)); EXPECT_THAT(filter_data.schema_type_id(), Eq(1)); EXPECT_THAT(initialize_stats.document_store_recovery_cause(), Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); @@ -3477,8 +3529,9 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) { .SetTtlMs(document1_ttl_) .Build(); ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc)); - ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data, - doc_store->GetDocumentFilterData(docid)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData filter_data, + doc_store->GetAliveDocumentFilterData(docid)); ASSERT_THAT(filter_data.schema_type_id(), Eq(0)); } @@ -3516,8 +3569,9 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) { std::move(create_result.document_store); // Check that the type id of the email document has not been updated. - ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data, - doc_store->GetDocumentFilterData(docid)); + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData filter_data, + doc_store->GetAliveDocumentFilterData(docid)); ASSERT_THAT(filter_data.schema_type_id(), Eq(0)); } } @@ -3733,7 +3787,6 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { } } -#ifndef DISABLE_BACKWARDS_COMPAT_TEST TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) { // Set up schema. SchemaProto schema = @@ -3854,7 +3907,6 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) { EXPECT_THAT(document_store->Get(/*document_id=*/2), IsOkAndHolds(EqualsProto(document3))); } -#endif // DISABLE_BACKWARDS_COMPAT_TEST TEST_F(DocumentStoreTest, GetDebugInfo) { SchemaProto schema = @@ -3928,8 +3980,9 @@ TEST_F(DocumentStoreTest, GetDebugInfo) { .Build(); ICING_ASSERT_OK(document_store->Put(document4, 2)); - ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out1, - document_store->GetDebugInfo(/*verbosity=*/1)); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentDebugInfoProto out1, + document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED)); EXPECT_THAT(out1.crc(), Gt(0)); EXPECT_THAT(out1.document_storage_info().num_alive_documents(), Eq(4)); EXPECT_THAT(out1.document_storage_info().num_deleted_documents(), Eq(0)); @@ -3957,8 +4010,9 @@ TEST_F(DocumentStoreTest, GetDebugInfo) { // Delete document3. ICING_ASSERT_OK(document_store->Delete("namespace2", "email/3")); - ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out2, - document_store->GetDebugInfo(/*verbosity=*/1)); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentDebugInfoProto out2, + document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED)); EXPECT_THAT(out2.crc(), Gt(0)); EXPECT_THAT(out2.crc(), Not(Eq(out1.crc()))); EXPECT_THAT(out2.document_storage_info().num_alive_documents(), Eq(3)); @@ -3970,8 +4024,9 @@ TEST_F(DocumentStoreTest, GetDebugInfo) { UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2), EqualsProto(info3))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out3, - document_store->GetDebugInfo(/*verbosity=*/0)); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentDebugInfoProto out3, + document_store->GetDebugInfo(DebugInfoVerbosity::BASIC)); EXPECT_THAT(out3.corpus_info(), IsEmpty()); } @@ -3989,8 +4044,9 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) { schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); - ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out, - document_store->GetDebugInfo(/*verbosity=*/1)); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentDebugInfoProto out, + document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED)); EXPECT_THAT(out.crc(), Gt(0)); EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0)); EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0)); @@ -4005,8 +4061,9 @@ TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) { schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); - ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out, - document_store->GetDebugInfo(/*verbosity=*/1)); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentDebugInfoProto out, + document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED)); EXPECT_THAT(out.crc(), Gt(0)); EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0)); EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0)); diff --git a/icing/store/dynamic-trie-key-mapper.h b/icing/store/dynamic-trie-key-mapper.h new file mode 100644 index 0000000..dedd7b9 --- /dev/null +++ b/icing/store/dynamic-trie-key-mapper.h @@ -0,0 +1,299 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_ +#define ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_ + +#include <cstdint> +#include <cstring> +#include <memory> +#include <string> +#include <string_view> +#include <type_traits> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/absl_ports/str_join.h" +#include "icing/file/filesystem.h" +#include "icing/legacy/index/icing-dynamic-trie.h" +#include "icing/legacy/index/icing-filesystem.h" +#include "icing/store/key-mapper.h" +#include "icing/util/crc32.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +// File-backed mapping between the string key and a trivially copyable value +// type. +// +// DynamicTrieKeyMapper is thread-compatible +template <typename T, typename Formatter = absl_ports::DefaultFormatter> +class DynamicTrieKeyMapper : public KeyMapper<T, Formatter> { + public: + // Returns an initialized instance of DynamicTrieKeyMapper that can + // immediately handle read/write operations. + // Returns any encountered IO errors. + // + // base_dir : Base directory used to save all the files required to persist + // DynamicTrieKeyMapper. If this base_dir was previously used to + // create a DynamicTrieKeyMapper, then this existing data would be + // loaded. Otherwise, an empty DynamicTrieKeyMapper would be + // created. + // maximum_size_bytes : The maximum allowable size of the key mapper storage. + static libtextclassifier3::StatusOr< + std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>> + Create(const Filesystem& filesystem, std::string_view base_dir, + int maximum_size_bytes); + + // Deletes all the files associated with the DynamicTrieKeyMapper. Returns + // success or any encountered IO errors + // + // base_dir : Base directory used to save all the files required to persist + // DynamicTrieKeyMapper. Should be the same as passed into + // Create(). + static libtextclassifier3::Status Delete(const Filesystem& filesystem, + std::string_view base_dir); + + ~DynamicTrieKeyMapper() override = default; + + libtextclassifier3::Status Put(std::string_view key, T value) override; + + libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key, + T next_value) override; + + libtextclassifier3::StatusOr<T> Get(std::string_view key) const override; + + bool Delete(std::string_view key) override; + + std::unordered_map<T, std::string> GetValuesToKeys() const override; + + int32_t num_keys() const override { return trie_.size(); } + + libtextclassifier3::Status PersistToDisk() override; + + libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override; + + libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override; + + Crc32 ComputeChecksum() override; + + private: + static constexpr char kDynamicTrieKeyMapperDir[] = "key_mapper_dir"; + static constexpr char kDynamicTrieKeyMapperPrefix[] = "key_mapper"; + + // Use DynamicTrieKeyMapper::Create() to instantiate. + explicit DynamicTrieKeyMapper(std::string_view key_mapper_dir); + + // Load any existing DynamicTrieKeyMapper data from disk, or creates a new + // instance of DynamicTrieKeyMapper on disk and gets ready to process + // read/write operations. + // + // Returns any encountered IO errors. + libtextclassifier3::Status Initialize(int maximum_size_bytes); + + const std::string file_prefix_; + + // TODO(adorokhine) Filesystem is a forked class that's available both in + // icing and icing namespaces. We will need icing::Filesystem in order + // to use IcingDynamicTrie. Filesystem class should be fully refactored + // to have a single definition across both namespaces. Such a class should + // use icing (and general google3) coding conventions and behave like + // a proper C++ class. + const IcingFilesystem icing_filesystem_; + IcingDynamicTrie trie_; + + static_assert(std::is_trivially_copyable<T>::value, + "T must be trivially copyable"); +}; + +template <typename T, typename Formatter> +libtextclassifier3::StatusOr< + std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>> +DynamicTrieKeyMapper<T, Formatter>::Create(const Filesystem& filesystem, + std::string_view base_dir, + int maximum_size_bytes) { + // We create a subdirectory since the trie creates and stores multiple files. + // This makes it easier to isolate the trie files away from other files that + // could potentially be in the same base_dir, and makes it easier to delete. + const std::string key_mapper_dir = + absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir); + if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to create DynamicTrieKeyMapper directory: ", key_mapper_dir)); + } + auto mapper = std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>( + new DynamicTrieKeyMapper<T, Formatter>(key_mapper_dir)); + ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes)); + return mapper; +} + +template <typename T, typename Formatter> +libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Delete( + const Filesystem& filesystem, std::string_view base_dir) { + std::string key_mapper_dir = + absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir); + if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to delete DynamicTrieKeyMapper directory: ", key_mapper_dir)); + } + return libtextclassifier3::Status::OK; +} + +template <typename T, typename Formatter> +DynamicTrieKeyMapper<T, Formatter>::DynamicTrieKeyMapper( + std::string_view key_mapper_dir) + : file_prefix_( + absl_ports::StrCat(key_mapper_dir, "/", kDynamicTrieKeyMapperPrefix)), + trie_(file_prefix_, + IcingDynamicTrie::RuntimeOptions().set_storage_policy( + IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc), + &icing_filesystem_) {} + +template <typename T, typename Formatter> +libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Initialize( + int maximum_size_bytes) { + IcingDynamicTrie::Options options; + // Divide the max space between the three internal arrays: nodes, nexts and + // suffixes. MaxNodes and MaxNexts are in units of their own data structures. + // MaxSuffixesSize is in units of bytes. + options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node)); + options.max_nexts = options.max_nodes; + options.max_suffixes_size = + sizeof(IcingDynamicTrie::Node) * options.max_nodes; + options.value_size = sizeof(T); + + if (!trie_.CreateIfNotExist(options)) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to create DynamicTrieKeyMapper file: ", file_prefix_)); + } + if (!trie_.Init()) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to init DynamicTrieKeyMapper file: ", file_prefix_)); + } + return libtextclassifier3::Status::OK; +} + +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::GetOrPut( + std::string_view key, T next_value) { + std::string string_key(key); + uint32_t value_index; + if (!trie_.Insert(string_key.c_str(), &next_value, &value_index, + /*replace=*/false)) { + return absl_ports::InternalError( + absl_ports::StrCat("Unable to insert key ", Formatter()(string_key), + " into DynamicTrieKeyMapper ", file_prefix_, ".")); + } + // This memory address could be unaligned since we're just grabbing the value + // from somewhere in the trie's suffix array. The suffix array is filled with + // chars, so the address might not be aligned to T values. + const T* unaligned_value = + static_cast<const T*>(trie_.GetValueAtIndex(value_index)); + + // memcpy the value to ensure that the returned value here is in a T-aligned + // address + T aligned_value; + memcpy(&aligned_value, unaligned_value, sizeof(T)); + return aligned_value; +} + +template <typename T, typename Formatter> +libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Put( + std::string_view key, T value) { + std::string string_key(key); + if (!trie_.Insert(string_key.c_str(), &value)) { + return absl_ports::InternalError( + absl_ports::StrCat("Unable to insert key ", Formatter()(string_key), + " into DynamicTrieKeyMapper ", file_prefix_, ".")); + } + return libtextclassifier3::Status::OK; +} + +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::Get( + std::string_view key) const { + std::string string_key(key); + T value; + if (!trie_.Find(string_key.c_str(), &value)) { + return absl_ports::NotFoundError( + absl_ports::StrCat("Key not found ", Formatter()(string_key), + " in DynamicTrieKeyMapper ", file_prefix_, ".")); + } + return value; +} + +template <typename T, typename Formatter> +bool DynamicTrieKeyMapper<T, Formatter>::Delete(std::string_view key) { + return trie_.Delete(key); +} + +template <typename T, typename Formatter> +std::unordered_map<T, std::string> +DynamicTrieKeyMapper<T, Formatter>::GetValuesToKeys() const { + std::unordered_map<T, std::string> values_to_keys; + for (IcingDynamicTrie::Iterator itr(trie_, /*prefix=*/""); itr.IsValid(); + itr.Advance()) { + if (itr.IsValid()) { + T value; + memcpy(&value, itr.GetValue(), sizeof(T)); + values_to_keys.insert({value, itr.GetKey()}); + } + } + + return values_to_keys; +} + +template <typename T, typename Formatter> +libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::PersistToDisk() { + if (!trie_.Sync()) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to sync DynamicTrieKeyMapper file: ", file_prefix_)); + } + + return libtextclassifier3::Status::OK; +} + +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<int64_t> +DynamicTrieKeyMapper<T, Formatter>::GetDiskUsage() const { + int64_t size = trie_.GetDiskUsage(); + if (size == IcingFilesystem::kBadFileSize || size < 0) { + return absl_ports::InternalError("Failed to get disk usage of key mapper"); + } + return size; +} + +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<int64_t> +DynamicTrieKeyMapper<T, Formatter>::GetElementsSize() const { + int64_t size = trie_.GetElementsSize(); + if (size == IcingFilesystem::kBadFileSize || size < 0) { + return absl_ports::InternalError( + "Failed to get disk usage of elements in the key mapper"); + } + return size; +} + +template <typename T, typename Formatter> +Crc32 DynamicTrieKeyMapper<T, Formatter>::ComputeChecksum() { + return Crc32(trie_.UpdateCrc()); +} + +} // namespace lib +} // namespace icing + +#endif // ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_ diff --git a/icing/store/key-mapper_test.cc b/icing/store/dynamic-trie-key-mapper_test.cc index 4e3dd8a..03ba5f2 100644 --- a/icing/store/key-mapper_test.cc +++ b/icing/store/dynamic-trie-key-mapper_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/store/key-mapper.h" +#include "icing/store/dynamic-trie-key-mapper.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -29,9 +29,9 @@ using ::testing::UnorderedElementsAre; namespace icing { namespace lib { namespace { -constexpr int kMaxKeyMapperSize = 3 * 1024 * 1024; // 3 MiB +constexpr int kMaxDynamicTrieKeyMapperSize = 3 * 1024 * 1024; // 3 MiB -class KeyMapperTest : public testing::Test { +class DynamicTrieKeyMapperTest : public testing::Test { protected: void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; } @@ -43,36 +43,39 @@ class KeyMapperTest : public testing::Test { Filesystem filesystem_; }; -TEST_F(KeyMapperTest, InvalidBaseDir) { - ASSERT_THAT( - KeyMapper<DocumentId>::Create(filesystem_, "/dev/null", kMaxKeyMapperSize) - .status() - .error_message(), - HasSubstr("Failed to create KeyMapper")); +TEST_F(DynamicTrieKeyMapperTest, InvalidBaseDir) { + ASSERT_THAT(DynamicTrieKeyMapper<DocumentId>::Create( + filesystem_, "/dev/null", kMaxDynamicTrieKeyMapperSize) + .status() + .error_message(), + HasSubstr("Failed to create DynamicTrieKeyMapper")); } -TEST_F(KeyMapperTest, NegativeMaxKeyMapperSizeReturnsInternalError) { - ASSERT_THAT(KeyMapper<DocumentId>::Create(filesystem_, base_dir_, -1), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); +TEST_F(DynamicTrieKeyMapperTest, NegativeMaxKeyMapperSizeReturnsInternalError) { + ASSERT_THAT( + DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, -1), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } -TEST_F(KeyMapperTest, TooLargeMaxKeyMapperSizeReturnsInternalError) { - ASSERT_THAT(KeyMapper<DocumentId>::Create(filesystem_, base_dir_, - std::numeric_limits<int>::max()), +TEST_F(DynamicTrieKeyMapperTest, TooLargeMaxKeyMapperSizeReturnsInternalError) { + ASSERT_THAT(DynamicTrieKeyMapper<DocumentId>::Create( + filesystem_, base_dir_, std::numeric_limits<int>::max()), StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } -TEST_F(KeyMapperTest, CreateNewKeyMapper) { +TEST_F(DynamicTrieKeyMapperTest, CreateNewKeyMapper) { ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); + std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper, + DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, + kMaxDynamicTrieKeyMapperSize)); EXPECT_THAT(key_mapper->num_keys(), 0); } -TEST_F(KeyMapperTest, CanUpdateSameKeyMultipleTimes) { +TEST_F(DynamicTrieKeyMapperTest, CanUpdateSameKeyMultipleTimes) { ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); + std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper, + DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, + kMaxDynamicTrieKeyMapperSize)); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); ICING_EXPECT_OK(key_mapper->Put("default-youtube.com", 50)); @@ -88,10 +91,11 @@ TEST_F(KeyMapperTest, CanUpdateSameKeyMultipleTimes) { EXPECT_THAT(key_mapper->num_keys(), 2); } -TEST_F(KeyMapperTest, GetOrPutOk) { +TEST_F(DynamicTrieKeyMapperTest, GetOrPutOk) { ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); + std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper, + DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, + kMaxDynamicTrieKeyMapperSize)); EXPECT_THAT(key_mapper->Get("foo"), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -99,15 +103,16 @@ TEST_F(KeyMapperTest, GetOrPutOk) { EXPECT_THAT(key_mapper->Get("foo"), IsOkAndHolds(1)); } -TEST_F(KeyMapperTest, CanPersistToDiskRegularly) { +TEST_F(DynamicTrieKeyMapperTest, CanPersistToDiskRegularly) { ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); - // Can persist an empty KeyMapper. + std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper, + DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, + kMaxDynamicTrieKeyMapperSize)); + // Can persist an empty DynamicTrieKeyMapper. ICING_EXPECT_OK(key_mapper->PersistToDisk()); EXPECT_THAT(key_mapper->num_keys(), 0); - // Can persist the smallest KeyMapper. + // Can persist the smallest DynamicTrieKeyMapper. ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); ICING_EXPECT_OK(key_mapper->PersistToDisk()); EXPECT_THAT(key_mapper->num_keys(), 1); @@ -124,17 +129,18 @@ TEST_F(KeyMapperTest, CanPersistToDiskRegularly) { EXPECT_THAT(key_mapper->num_keys(), 2); } -TEST_F(KeyMapperTest, CanUseAcrossMultipleInstances) { +TEST_F(DynamicTrieKeyMapperTest, CanUseAcrossMultipleInstances) { ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); + std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper, + DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, + kMaxDynamicTrieKeyMapperSize)); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); ICING_EXPECT_OK(key_mapper->PersistToDisk()); key_mapper.reset(); ICING_ASSERT_OK_AND_ASSIGN( - key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); + key_mapper, DynamicTrieKeyMapper<DocumentId>::Create( + filesystem_, base_dir_, kMaxDynamicTrieKeyMapperSize)); EXPECT_THAT(key_mapper->num_keys(), 1); EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(100)); @@ -146,30 +152,34 @@ TEST_F(KeyMapperTest, CanUseAcrossMultipleInstances) { EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(300)); } -TEST_F(KeyMapperTest, CanDeleteAndRestartKeyMapping) { +TEST_F(DynamicTrieKeyMapperTest, CanDeleteAndRestartKeyMapping) { // Can delete even if there's nothing there - ICING_EXPECT_OK(KeyMapper<DocumentId>::Delete(filesystem_, base_dir_)); + ICING_EXPECT_OK( + DynamicTrieKeyMapper<DocumentId>::Delete(filesystem_, base_dir_)); ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); + std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper, + DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, + kMaxDynamicTrieKeyMapperSize)); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); ICING_EXPECT_OK(key_mapper->PersistToDisk()); - ICING_EXPECT_OK(KeyMapper<DocumentId>::Delete(filesystem_, base_dir_)); + ICING_EXPECT_OK( + DynamicTrieKeyMapper<DocumentId>::Delete(filesystem_, base_dir_)); key_mapper.reset(); ICING_ASSERT_OK_AND_ASSIGN( - key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); + key_mapper, DynamicTrieKeyMapper<DocumentId>::Create( + filesystem_, base_dir_, kMaxDynamicTrieKeyMapperSize)); EXPECT_THAT(key_mapper->num_keys(), 0); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); EXPECT_THAT(key_mapper->num_keys(), 1); } -TEST_F(KeyMapperTest, GetValuesToKeys) { +TEST_F(DynamicTrieKeyMapperTest, GetValuesToKeys) { ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize)); + std::unique_ptr<DynamicTrieKeyMapper<DocumentId>> key_mapper, + DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, + kMaxDynamicTrieKeyMapperSize)); EXPECT_THAT(key_mapper->GetValuesToKeys(), IsEmpty()); ICING_EXPECT_OK(key_mapper->Put("foo", /*value=*/1)); diff --git a/icing/store/key-mapper.h b/icing/store/key-mapper.h index 23c7b69..e05d1b7 100644 --- a/icing/store/key-mapper.h +++ b/icing/store/key-mapper.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019 Google LLC +// Copyright (C) 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,81 +17,56 @@ #include <cstdint> #include <cstring> -#include <memory> #include <string> #include <string_view> #include <type_traits> +#include <unordered_map> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" -#include "icing/absl_ports/canonical_errors.h" -#include "icing/absl_ports/str_cat.h" -#include "icing/file/filesystem.h" -#include "icing/legacy/index/icing-dynamic-trie.h" -#include "icing/legacy/index/icing-filesystem.h" +#include "icing/absl_ports/str_join.h" #include "icing/util/crc32.h" -#include "icing/util/status-macros.h" namespace icing { namespace lib { -// File-backed mapping between the string key and a trivially copyable value -// type. +// An interface for file-backed mapping between the string key and a trivially +// copyable value type. // -// KeyMapper is thread-compatible -template <typename T> +// The implementation for KeyMapper should be thread-compatible +template <typename T, typename Formatter = absl_ports::DefaultFormatter> class KeyMapper { public: - // Returns an initialized instance of KeyMapper that can immediately handle - // read/write operations. - // Returns any encountered IO errors. - // - // base_dir : Base directory used to save all the files required to persist - // KeyMapper. If this base_dir was previously used to create a - // KeyMapper, then this existing data would be loaded. Otherwise, - // an empty KeyMapper would be created. - // maximum_size_bytes : The maximum allowable size of the key mapper storage. - static libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> Create( - const Filesystem& filesystem, std::string_view base_dir, - int maximum_size_bytes); - - // Deletes all the files associated with the KeyMapper. Returns success or any - // encountered IO errors - // - // base_dir : Base directory used to save all the files required to persist - // KeyMapper. Should be the same as passed into Create(). - static libtextclassifier3::Status Delete(const Filesystem& filesystem, - std::string_view base_dir); - - ~KeyMapper() = default; + virtual ~KeyMapper() = default; // Inserts/Updates value for key. // Returns any encountered IO errors. // // NOTE: Put() doesn't automatically flush changes to disk and relies on // either explicit calls to PersistToDisk() or a clean shutdown of the class. - libtextclassifier3::Status Put(std::string_view key, T value); + virtual libtextclassifier3::Status Put(std::string_view key, T value) = 0; // Finds the current value for key and returns it. If key is not present, it // is inserted with next_value and next_value is returned. // // Returns any IO errors that may occur during Put. - libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key, T next_value); + virtual libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key, + T next_value) = 0; // Returns the value corresponding to the key. // // Returns NOT_FOUND error if the key was missing. // Returns any encountered IO errors. - libtextclassifier3::StatusOr<T> Get(std::string_view key) const; + virtual libtextclassifier3::StatusOr<T> Get(std::string_view key) const = 0; // Deletes data related to the given key. Returns true on success. - bool Delete(std::string_view key); + virtual bool Delete(std::string_view key) = 0; // Returns a map of values to keys. Empty map if the mapper is empty. - std::unordered_map<T, std::string> GetValuesToKeys() const; + virtual std::unordered_map<T, std::string> GetValuesToKeys() const = 0; // Count of unique keys stored in the KeyMapper. - int32_t num_keys() const { return trie_.size(); } + virtual int32_t num_keys() const = 0; // Syncs all the changes made to the KeyMapper to disk. // Returns any encountered IO errors. @@ -103,7 +78,7 @@ class KeyMapper { // Returns: // OK on success // INTERNAL on I/O error - libtextclassifier3::Status PersistToDisk(); + virtual libtextclassifier3::Status PersistToDisk() = 0; // Calculates and returns the disk usage in bytes. Rounds up to the nearest // block size. @@ -111,7 +86,7 @@ class KeyMapper { // Returns: // Disk usage on success // INTERNAL_ERROR on IO error - libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const; + virtual libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const = 0; // Returns the size of the elements held in the key mapper. This excludes the // size of any internal metadata of the key mapper, e.g. the key mapper's @@ -120,197 +95,16 @@ class KeyMapper { // Returns: // File size on success // INTERNAL_ERROR on IO error - libtextclassifier3::StatusOr<int64_t> GetElementsSize() const; + virtual libtextclassifier3::StatusOr<int64_t> GetElementsSize() const = 0; // Computes and returns the checksum of the header and contents. - Crc32 ComputeChecksum(); + virtual Crc32 ComputeChecksum() = 0; private: - static constexpr char kKeyMapperDir[] = "key_mapper_dir"; - static constexpr char kKeyMapperPrefix[] = "key_mapper"; - - // Use KeyMapper::Create() to instantiate. - explicit KeyMapper(std::string_view key_mapper_dir); - - // Load any existing KeyMapper data from disk, or creates a new instance - // of KeyMapper on disk and gets ready to process read/write operations. - // - // Returns any encountered IO errors. - libtextclassifier3::Status Initialize(int maximum_size_bytes); - - const std::string file_prefix_; - - // TODO(adorokhine) Filesystem is a forked class that's available both in - // icing and icing namespaces. We will need icing::Filesystem in order - // to use IcingDynamicTrie. Filesystem class should be fully refactored - // to have a single definition across both namespaces. Such a class should - // use icing (and general google3) coding conventions and behave like - // a proper C++ class. - const IcingFilesystem icing_filesystem_; - IcingDynamicTrie trie_; - static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable"); }; -template <typename T> -libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> -KeyMapper<T>::Create(const Filesystem& filesystem, std::string_view base_dir, - int maximum_size_bytes) { - // We create a subdirectory since the trie creates and stores multiple files. - // This makes it easier to isolate the trie files away from other files that - // could potentially be in the same base_dir, and makes it easier to delete. - const std::string key_mapper_dir = - absl_ports::StrCat(base_dir, "/", kKeyMapperDir); - if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) { - return absl_ports::InternalError(absl_ports::StrCat( - "Failed to create KeyMapper directory: ", key_mapper_dir)); - } - auto mapper = std::unique_ptr<KeyMapper<T>>(new KeyMapper<T>(key_mapper_dir)); - ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes)); - return mapper; -} - -template <typename T> -libtextclassifier3::Status KeyMapper<T>::Delete(const Filesystem& filesystem, - std::string_view base_dir) { - std::string key_mapper_dir = absl_ports::StrCat(base_dir, "/", kKeyMapperDir); - if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) { - return absl_ports::InternalError(absl_ports::StrCat( - "Failed to delete KeyMapper directory: ", key_mapper_dir)); - } - return libtextclassifier3::Status::OK; -} - -template <typename T> -KeyMapper<T>::KeyMapper(std::string_view key_mapper_dir) - : file_prefix_(absl_ports::StrCat(key_mapper_dir, "/", kKeyMapperPrefix)), - trie_(file_prefix_, - IcingDynamicTrie::RuntimeOptions().set_storage_policy( - IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc), - &icing_filesystem_) {} - -template <typename T> -libtextclassifier3::Status KeyMapper<T>::Initialize(int maximum_size_bytes) { - IcingDynamicTrie::Options options; - // Divide the max space between the three internal arrays: nodes, nexts and - // suffixes. MaxNodes and MaxNexts are in units of their own data structures. - // MaxSuffixesSize is in units of bytes. - options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node)); - options.max_nexts = options.max_nodes; - options.max_suffixes_size = - sizeof(IcingDynamicTrie::Node) * options.max_nodes; - options.value_size = sizeof(T); - - if (!trie_.CreateIfNotExist(options)) { - return absl_ports::InternalError( - absl_ports::StrCat("Failed to create KeyMapper file: ", file_prefix_)); - } - if (!trie_.Init()) { - return absl_ports::InternalError( - absl_ports::StrCat("Failed to init KeyMapper file: ", file_prefix_)); - } - return libtextclassifier3::Status::OK; -} - -template <typename T> -libtextclassifier3::StatusOr<T> KeyMapper<T>::GetOrPut(std::string_view key, - T next_value) { - std::string string_key(key); - uint32_t value_index; - if (!trie_.Insert(string_key.c_str(), &next_value, &value_index, - /*replace=*/false)) { - return absl_ports::InternalError(absl_ports::StrCat( - "Unable to insert key ", key, " into KeyMapper ", file_prefix_, ".")); - } - // This memory address could be unaligned since we're just grabbing the value - // from somewhere in the trie's suffix array. The suffix array is filled with - // chars, so the address might not be aligned to T values. - const T* unaligned_value = - static_cast<const T*>(trie_.GetValueAtIndex(value_index)); - - // memcpy the value to ensure that the returned value here is in a T-aligned - // address - T aligned_value; - memcpy(&aligned_value, unaligned_value, sizeof(T)); - return aligned_value; -} - -template <typename T> -libtextclassifier3::Status KeyMapper<T>::Put(std::string_view key, T value) { - std::string string_key(key); - if (!trie_.Insert(string_key.c_str(), &value)) { - return absl_ports::InternalError(absl_ports::StrCat( - "Unable to insert key ", key, " into KeyMapper ", file_prefix_, ".")); - } - return libtextclassifier3::Status::OK; -} - -template <typename T> -libtextclassifier3::StatusOr<T> KeyMapper<T>::Get(std::string_view key) const { - std::string string_key(key); - T value; - if (!trie_.Find(string_key.c_str(), &value)) { - return absl_ports::NotFoundError(absl_ports::StrCat( - "Key not found ", key, " in KeyMapper ", file_prefix_, ".")); - } - return value; -} - -template <typename T> -bool KeyMapper<T>::Delete(std::string_view key) { - return trie_.Delete(key); -} - -template <typename T> -std::unordered_map<T, std::string> KeyMapper<T>::GetValuesToKeys() const { - std::unordered_map<T, std::string> values_to_keys; - for (IcingDynamicTrie::Iterator itr(trie_, /*prefix=*/""); itr.IsValid(); - itr.Advance()) { - if (itr.IsValid()) { - T value; - memcpy(&value, itr.GetValue(), sizeof(T)); - values_to_keys.insert({value, itr.GetKey()}); - } - } - - return values_to_keys; -} - -template <typename T> -libtextclassifier3::Status KeyMapper<T>::PersistToDisk() { - if (!trie_.Sync()) { - return absl_ports::InternalError( - absl_ports::StrCat("Failed to sync KeyMapper file: ", file_prefix_)); - } - - return libtextclassifier3::Status::OK; -} - -template <typename T> -libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetDiskUsage() const { - int64_t size = trie_.GetDiskUsage(); - if (size == IcingFilesystem::kBadFileSize || size < 0) { - return absl_ports::InternalError("Failed to get disk usage of key mapper"); - } - return size; -} - -template <typename T> -libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetElementsSize() const { - int64_t size = trie_.GetElementsSize(); - if (size == IcingFilesystem::kBadFileSize || size < 0) { - return absl_ports::InternalError( - "Failed to get disk usage of elements in the key mapper"); - } - return size; -} - -template <typename T> -Crc32 KeyMapper<T>::ComputeChecksum() { - return Crc32(trie_.UpdateCrc()); -} - } // namespace lib } // namespace icing diff --git a/icing/store/namespace-checker-impl.h b/icing/store/namespace-checker-impl.h index bcd0643..0b6fca9 100644 --- a/icing/store/namespace-checker-impl.h +++ b/icing/store/namespace-checker-impl.h @@ -32,14 +32,18 @@ class NamespaceCheckerImpl : public NamespaceChecker { target_namespace_ids_(std::move(target_namespace_ids)) {} bool BelongsToTargetNamespaces(DocumentId document_id) const override { + auto document_filter_data_optional_ = + document_store_.GetAliveDocumentFilterData(document_id); + if (!document_filter_data_optional_) { + // The document doesn't exist. + return false; + } if (target_namespace_ids_.empty()) { return true; } - auto document_filter_data_or_ = - document_store_.GetDocumentFilterData(document_id); - return document_filter_data_or_.ok() && - target_namespace_ids_.count( - document_filter_data_or_.ValueOrDie().namespace_id())> 0; + DocumentFilterData document_filter_data = + document_filter_data_optional_.value(); + return target_namespace_ids_.count(document_filter_data.namespace_id()) > 0; } const DocumentStore& document_store_; std::unordered_set<NamespaceId> target_namespace_ids_; diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h index f83fe0a..81f65b2 100644 --- a/icing/testing/common-matchers.h +++ b/icing/testing/common-matchers.h @@ -460,6 +460,10 @@ MATCHER_P(EqualsSearchResultIgnoreStatsAndScores, expected, "") { ICING_ASSERT_OK(statusor.status()); \ lhs = std::move(statusor).ValueOrDie() +#define ICING_ASSERT_HAS_VALUE_AND_ASSIGN(lhs, rexpr) \ + ASSERT_TRUE(rexpr); \ + lhs = rexpr.value() + } // namespace lib } // namespace icing diff --git a/icing/tokenization/combined-tokenizer_test.cc b/icing/tokenization/combined-tokenizer_test.cc index 0212e4f..42c7743 100644 --- a/icing/tokenization/combined-tokenizer_test.cc +++ b/icing/tokenization/combined-tokenizer_test.cc @@ -15,19 +15,19 @@ #include <string_view> #include <vector> -#include "testing/base/public/gmock.h" -#include "testing/base/public/gunit.h" -#include "third_party/icing/portable/platform.h" -#include "third_party/icing/proto/schema_proto_portable.pb.h" -#include "third_party/icing/testing/common-matchers.h" -#include "third_party/icing/testing/icu-data-file-helper.h" -#include "third_party/icing/testing/jni-test-helpers.h" -#include "third_party/icing/testing/test-data.h" -#include "third_party/icing/tokenization/language-segmenter-factory.h" -#include "third_party/icing/tokenization/language-segmenter.h" -#include "third_party/icing/tokenization/tokenizer-factory.h" -#include "third_party/icing/tokenization/tokenizer.h" -#include "third_party/icu/include/unicode/uloc.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/portable/platform.h" +#include "icing/proto/schema.pb.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" +#include "icing/testing/test-data.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/tokenization/language-segmenter.h" +#include "icing/tokenization/tokenizer-factory.h" +#include "icing/tokenization/tokenizer.h" +#include "unicode/uloc.h" namespace icing { namespace lib { @@ -43,9 +43,9 @@ class CombinedTokenizerTest : public ::testing::Test { void SetUp() override { if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { ICING_ASSERT_OK( - // File generated via icu_data_file rule in //third_party/icing/BUILD. + // File generated via icu_data_file rule in //icing/BUILD. icu_data_file_helper::SetUpICUDataFile( - GetTestFilePath("third_party/icing/icu.dat"))); + GetTestFilePath("icing/icu.dat"))); } jni_cache_ = GetTestJniCache(); diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc index 6f7d4df..748a322 100644 --- a/icing/tokenization/language-segmenter_benchmark.cc +++ b/icing/tokenization/language-segmenter_benchmark.cc @@ -27,7 +27,7 @@ // //icing/tokenization:language-segmenter_benchmark // // $ blaze-bin/icing/tokenization/language-segmenter_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // Make target //icing/tokenization:language-segmenter depend on @@ -41,7 +41,7 @@ // blaze-bin/icing/tokenization/language-segmenter_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmark_filter=all // --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc index e5de6e6..bd80718 100644 --- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc +++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc @@ -74,6 +74,7 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator { MarkAsDone(); return false; } + return true; } diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc index 277ece6..8b13cd1 100644 --- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc +++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc @@ -423,7 +423,6 @@ TEST_P(ReverseJniLanguageSegmenterTest, CJKT) { // Khmer EXPECT_THAT(language_segmenter->GetAllTerms("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"), IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ", "។"))); - // Thai EXPECT_THAT( language_segmenter->GetAllTerms("ฉันเดินไปทำงานทุกวัน"), diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc index fdd4c70..fe8289a 100644 --- a/icing/transform/icu/icu-normalizer_benchmark.cc +++ b/icing/transform/icu/icu-normalizer_benchmark.cc @@ -25,7 +25,7 @@ // //icing/transform/icu:icu-normalizer_benchmark // // $ blaze-bin/icing/transform/icu/icu-normalizer_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // Make target //icing/transform:normalizer depend on @@ -39,7 +39,7 @@ // blaze-bin/icing/transform/icu/icu-normalizer_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/icu-normalizer_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/icu-normalizer_benchmark --benchmark_filter=all // --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, diff --git a/icing/transform/map/map-normalizer_benchmark.cc b/icing/transform/map/map-normalizer_benchmark.cc index 8268541..4560329 100644 --- a/icing/transform/map/map-normalizer_benchmark.cc +++ b/icing/transform/map/map-normalizer_benchmark.cc @@ -24,7 +24,7 @@ // //icing/transform/map:map-normalizer_benchmark // // $ blaze-bin/icing/transform/map/map-normalizer_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -35,7 +35,7 @@ // blaze-bin/icing/transform/map/map-normalizer_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/map-normalizer_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/map-normalizer_benchmark --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc index 45c23e0..b03d3f5 100644 --- a/icing/util/document-validator_test.cc +++ b/icing/util/document-validator_test.cc @@ -125,10 +125,10 @@ class DocumentValidatorTest : public ::testing::Test { } std::string schema_dir_; - std::unique_ptr<DocumentValidator> document_validator_; - std::unique_ptr<SchemaStore> schema_store_; Filesystem filesystem_; FakeClock fake_clock_; + std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<DocumentValidator> document_validator_; }; TEST_F(DocumentValidatorTest, ValidateSimpleSchemasOk) { diff --git a/icing/util/fingerprint-util.cc b/icing/util/fingerprint-util.cc new file mode 100644 index 0000000..0ea843f --- /dev/null +++ b/icing/util/fingerprint-util.cc @@ -0,0 +1,48 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/fingerprint-util.h" + +namespace icing { +namespace lib { + +namespace fingerprint_util { + +// A formatter to properly handle a string that is actually just a hash value. +std::string GetFingerprintString(uint64_t fingerprint) { + std::string encoded_fprint; + // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in + // base128 and add 1 to make sure that no byte is '0'. This increases the + // size of the encoded_fprint from 8-bytes to 10-bytes. + while (fingerprint) { + encoded_fprint.push_back((fingerprint & 0x7F) + 1); + fingerprint >>= 7; + } + return encoded_fprint; +} + +uint64_t GetFingerprint(std::string_view fingerprint_string) { + uint64_t fprint = 0; + for (int i = fingerprint_string.length() - 1; i >= 0; --i) { + fprint <<= 7; + char c = fingerprint_string[i] - 1; + fprint |= (c & 0x7F); + } + return fprint; +} + +} // namespace fingerprint_util + +} // namespace lib +} // namespace icing diff --git a/icing/util/fingerprint-util.h b/icing/util/fingerprint-util.h new file mode 100644 index 0000000..9e98617 --- /dev/null +++ b/icing/util/fingerprint-util.h @@ -0,0 +1,47 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_UTIL_FINGERPRINT_UTIL_H_ +#define ICING_UTIL_FINGERPRINT_UTIL_H_ + +#include <cstdint> +#include <string> +#include <string_view> + +namespace icing { +namespace lib { + +namespace fingerprint_util { + +// Converts from a fingerprint to a fingerprint string. +std::string GetFingerprintString(uint64_t fingerprint); + +// Converts from a fingerprint string to a fingerprint. +uint64_t GetFingerprint(std::string_view fingerprint_string); + +// A formatter to properly handle a string that is actually just a hash value. +class FingerprintStringFormatter { + public: + std::string operator()(std::string_view fingerprint_string) { + uint64_t fingerprint = GetFingerprint(fingerprint_string); + return std::to_string(fingerprint); + } +}; + +} // namespace fingerprint_util + +} // namespace lib +} // namespace icing + +#endif // ICING_UTIL_FINGERPRINT_UTIL_H_ diff --git a/icing/util/fingerprint-util_test.cc b/icing/util/fingerprint-util_test.cc new file mode 100644 index 0000000..948c75a --- /dev/null +++ b/icing/util/fingerprint-util_test.cc @@ -0,0 +1,75 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/fingerprint-util.h" + +#include <cstdint> +#include <limits> + +#include "icing/text_classifier/lib3/utils/hash/farmhash.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace icing { +namespace lib { +namespace fingerprint_util { + +namespace { + +using ::testing::Eq; + +TEST(FingerprintUtilTest, ConversionIsReversible) { + std::string str = "foo-bar-baz"; + uint64_t fprint = tc3farmhash::Fingerprint64(str); + std::string fprint_string = GetFingerprintString(fprint); + EXPECT_THAT(GetFingerprint(fprint_string), Eq(fprint)); +} + +TEST(FingerprintUtilTest, ZeroConversionIsReversible) { + uint64_t fprint = 0; + std::string fprint_string = GetFingerprintString(fprint); + EXPECT_THAT(GetFingerprint(fprint_string), Eq(fprint)); +} + +TEST(FingerprintUtilTest, MultipleConversionsAreReversible) { + EXPECT_THAT(GetFingerprint(GetFingerprintString(25)), Eq(25)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(766)), Eq(766)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(2305)), Eq(2305)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(6922)), Eq(6922)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(62326)), Eq(62326)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(186985)), Eq(186985)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(560962)), Eq(560962)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(1682893)), Eq(1682893)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(15146065)), Eq(15146065)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(136314613)), Eq(136314613)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(1226831545)), Eq(1226831545)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(11041483933)), + Eq(11041483933)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(2683080596566)), + Eq(2683080596566)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(72443176107373)), + Eq(72443176107373)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(1955965754899162)), + Eq(1955965754899162)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(52811075382277465)), + Eq(52811075382277465)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(4277697105964474945)), + Eq(4277697105964474945)); +} + +} // namespace + +} // namespace fingerprint_util +} // namespace lib +} // namespace icing diff --git a/icing/util/logging.cc b/icing/util/logging.cc new file mode 100644 index 0000000..8498be4 --- /dev/null +++ b/icing/util/logging.cc @@ -0,0 +1,124 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/logging.h" + +#include <atomic> +#include <exception> +#include <string_view> + +#include "icing/util/logging_raw.h" + +namespace icing { +namespace lib { +namespace { +// Returns pointer to beginning of last /-separated token from file_name. +// file_name should be a pointer to a zero-terminated array of chars. +// E.g., "foo/bar.cc" -> "bar.cc", "foo/" -> "", "foo" -> "foo". +const char *JumpToBasename(const char *file_name) { + if (file_name == nullptr) { + return nullptr; + } + + // Points to the beginning of the last encountered token. + size_t last_token_start = std::string_view(file_name).find_last_of('/'); + if (last_token_start == std::string_view::npos) { + return file_name; + } + return file_name + last_token_start + 1; +} + +// Calculate the logging level value based on severity and verbosity. +constexpr uint32_t CalculateLoggingLevel(LogSeverity::Code severity, + uint16_t verbosity) { + uint32_t logging_level = static_cast<uint16_t>(severity); + logging_level = (logging_level << 16) | verbosity; + return logging_level; +} + +#if defined(ICING_DEBUG_LOGGING) +#define DEFAULT_LOGGING_LEVEL CalculateLoggingLevel(LogSeverity::VERBOSE, 1) +#else +#define DEFAULT_LOGGING_LEVEL CalculateLoggingLevel(LogSeverity::INFO, 0) +#endif + +// The current global logging level for Icing, which controls which logs are +// printed based on severity and verbosity. +// +// This needs to be global so that it can be easily accessed from ICING_LOG and +// ICING_VLOG macros spread throughout the entire code base. +// +// The first 16 bits represent the minimal log severity. +// The last 16 bits represent the current verbosity. +std::atomic<uint32_t> global_logging_level = DEFAULT_LOGGING_LEVEL; + +} // namespace + +// Whether we should log according to the current logging level. +bool ShouldLog(LogSeverity::Code severity, int16_t verbosity) { + if (verbosity < 0) { + return false; + } + // Using the relaxed order for better performance because we only need to + // guarantee the atomicity for this specific statement, without the need to + // worry about reordering. + uint32_t curr_logging_level = + global_logging_level.load(std::memory_order_relaxed); + // If severity is less than the the threshold set. + if (static_cast<uint16_t>(severity) < (curr_logging_level >> 16)) { + return false; + } + if (severity == LogSeverity::VERBOSE) { + // return whether the verbosity is within the current verbose level set. + return verbosity <= (curr_logging_level & 0xffff); + } + return true; +} + +bool SetLoggingLevel(LogSeverity::Code severity, int16_t verbosity) { + if (verbosity < 0) { + return false; + } + if (severity > LogSeverity::VERBOSE && verbosity > 0) { + return false; + } + // Using the relaxed order for better performance because we only need to + // guarantee the atomicity for this specific statement, without the need to + // worry about reordering. + global_logging_level.store(CalculateLoggingLevel(severity, verbosity), + std::memory_order_relaxed); + return true; +} + +LogMessage::LogMessage(LogSeverity::Code severity, uint16_t verbosity, + const char *file_name, int line_number) + : severity_(severity), + verbosity_(verbosity), + should_log_(ShouldLog(severity_, verbosity_)), + stream_(should_log_) { + if (should_log_) { + stream_ << JumpToBasename(file_name) << ":" << line_number << ": "; + } +} + +LogMessage::~LogMessage() { + if (should_log_) { + LowLevelLogging(severity_, kIcingLoggingTag, stream_.message); + } + if (severity_ == LogSeverity::FATAL) { + std::terminate(); // Will print a stacktrace (stdout or logcat). + } +} +} // namespace lib +} // namespace icing diff --git a/icing/util/logging.h b/icing/util/logging.h index 9d598fe..7742302 100644 --- a/icing/util/logging.h +++ b/icing/util/logging.h @@ -15,14 +15,130 @@ #ifndef ICING_UTIL_LOGGING_H_ #define ICING_UTIL_LOGGING_H_ -#include "icing/text_classifier/lib3/utils/base/logging.h" +#include <atomic> +#include <cstdint> +#include <string> +#include "icing/proto/debug.pb.h" + +// This header provides base/logging.h style macros, ICING_LOG and ICING_VLOG, +// for logging in various platforms. The macros use __android_log_write on +// Android, and log to stdout/stderr on others. It also provides a function +// SetLoggingLevel to control the log severity level for ICING_LOG and verbosity +// for ICING_VLOG. namespace icing { namespace lib { -// TODO(b/146903474) Add verbose level control -#define ICING_VLOG(verbose_level) TC3_VLOG(verbose_level) -#define ICING_LOG(severity) TC3_LOG(severity) +// Whether we should log according to the current logging level. +// The function will always return false when verbosity is negative. +bool ShouldLog(LogSeverity::Code severity, int16_t verbosity = 0); + +// Set the minimal logging severity to be enabled, and the verbose level to see +// from the logs. +// Return false if severity is set higher than VERBOSE but verbosity is not 0. +// The function will always return false when verbosity is negative. +bool SetLoggingLevel(LogSeverity::Code severity, int16_t verbosity = 0); + +// A tiny code footprint string stream for assembling log messages. +struct LoggingStringStream { + explicit LoggingStringStream(bool should_log) : should_log_(should_log) {} + LoggingStringStream& stream() { return *this; } + + std::string message; + const bool should_log_; +}; + +template <typename T> +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + const T& entry) { + if (stream.should_log_) { + stream.message.append(std::to_string(entry)); + } + return stream; +} + +template <typename T> +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + T* const entry) { + if (stream.should_log_) { + stream.message.append( + std::to_string(reinterpret_cast<const uint64_t>(entry))); + } + return stream; +} + +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + const char* message) { + if (stream.should_log_) { + stream.message.append(message); + } + return stream; +} + +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + const std::string& message) { + if (stream.should_log_) { + stream.message.append(message); + } + return stream; +} + +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + std::string_view message) { + if (stream.should_log_) { + stream.message.append(message); + } + return stream; +} + +template <typename T1, typename T2> +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + const std::pair<T1, T2>& entry) { + if (stream.should_log_) { + stream << "(" << entry.first << ", " << entry.second << ")"; + } + return stream; +} + +// The class that does all the work behind our ICING_LOG(severity) macros. Each +// ICING_LOG(severity) << obj1 << obj2 << ...; logging statement creates a +// LogMessage temporary object containing a stringstream. Each operator<< adds +// info to that stringstream and the LogMessage destructor performs the actual +// logging. The reason this works is that in C++, "all temporary objects are +// destroyed as the last step in evaluating the full-expression that (lexically) +// contains the point where they were created." For more info, see +// http://en.cppreference.com/w/cpp/language/lifetime. Hence, the destructor is +// invoked after the last << from that logging statement. +class LogMessage { + public: + LogMessage(LogSeverity::Code severity, uint16_t verbosity, + const char* file_name, int line_number) __attribute__((noinline)); + + ~LogMessage() __attribute__((noinline)); + + // Returns the stream associated with the logger object. + LoggingStringStream& stream() { return stream_; } + + private: + const LogSeverity::Code severity_; + const uint16_t verbosity_; + const bool should_log_; + + // Stream that "prints" all info into a string (not to a file). We construct + // here the entire logging message and next print it in one operation. + LoggingStringStream stream_; +}; + +inline constexpr char kIcingLoggingTag[] = "AppSearchIcing"; + +#define ICING_VLOG(verbose_level) \ + ::icing::lib::LogMessage(::icing::lib::LogSeverity::VERBOSE, verbose_level, \ + __FILE__, __LINE__) \ + .stream() +#define ICING_LOG(severity) \ + ::icing::lib::LogMessage(::icing::lib::LogSeverity::severity, \ + /*verbosity=*/0, __FILE__, __LINE__) \ + .stream() } // namespace lib } // namespace icing diff --git a/icing/util/logging_raw.cc b/icing/util/logging_raw.cc new file mode 100644 index 0000000..5e67fb3 --- /dev/null +++ b/icing/util/logging_raw.cc @@ -0,0 +1,102 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/logging_raw.h" + +#include <cstdio> +#include <string> + +// NOTE: this file contains two implementations: one for Android, one for all +// other cases. We always build exactly one implementation. +#if defined(__ANDROID__) + +// Compiled as part of Android. +#include <android/log.h> + +namespace icing { +namespace lib { + +namespace { +// Converts LogSeverity to level for __android_log_write. +int GetAndroidLogLevel(LogSeverity::Code severity) { + switch (severity) { + case LogSeverity::VERBOSE: + return ANDROID_LOG_VERBOSE; + case LogSeverity::DBG: + return ANDROID_LOG_DEBUG; + case LogSeverity::INFO: + return ANDROID_LOG_INFO; + case LogSeverity::WARNING: + return ANDROID_LOG_WARN; + case LogSeverity::ERROR: + return ANDROID_LOG_ERROR; + case LogSeverity::FATAL: + return ANDROID_LOG_FATAL; + } +} +} // namespace + +void LowLevelLogging(LogSeverity::Code severity, const std::string& tag, + const std::string& message) { + const int android_log_level = GetAndroidLogLevel(severity); +#if __ANDROID_API__ >= 30 + if (!__android_log_is_loggable(android_log_level, tag.c_str(), + /*default_prio=*/ANDROID_LOG_INFO)) { + return; + } +#endif // __ANDROID_API__ >= 30 + __android_log_write(android_log_level, tag.c_str(), message.c_str()); +} + +} // namespace lib +} // namespace icing + +#else // if defined(__ANDROID__) + +// Not on Android: implement LowLevelLogging to print to stderr (see below). +namespace icing { +namespace lib { + +namespace { +// Converts LogSeverity to human-readable text. +const char *LogSeverityToString(LogSeverity::Code severity) { + switch (severity) { + case LogSeverity::VERBOSE: + return "VERBOSE"; + case LogSeverity::DBG: + return "DEBUG"; + case LogSeverity::INFO: + return "INFO"; + case LogSeverity::WARNING: + return "WARNING"; + case LogSeverity::ERROR: + return "ERROR"; + case LogSeverity::FATAL: + return "FATAL"; + } +} +} // namespace + +void LowLevelLogging(LogSeverity::Code severity, const std::string &tag, + const std::string &message) { + // TODO(b/146903474) Do not log to stderr for logs other than FATAL and ERROR. + fprintf(stderr, "[%s] %s : %s\n", LogSeverityToString(severity), tag.c_str(), + message.c_str()); + fflush(stderr); +} + +} // namespace lib +} // namespace icing + +#endif // if defined(__ANDROID__) diff --git a/icing/util/logging_raw.h b/icing/util/logging_raw.h new file mode 100644 index 0000000..99dddb6 --- /dev/null +++ b/icing/util/logging_raw.h @@ -0,0 +1,34 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_UTIL_LOGGING_RAW_H_ +#define ICING_UTIL_LOGGING_RAW_H_ + +#include <string> + +#include "icing/proto/debug.pb.h" + +namespace icing { +namespace lib { + +// Low-level logging primitive. Logs a message, with the indicated log +// severity. From android/log.h: "the tag normally corresponds to the component +// that emits the log message, and should be reasonably small". +void LowLevelLogging(LogSeverity::Code severity, const std::string &tag, + const std::string &message); + +} // namespace lib +} // namespace icing + +#endif // ICING_UTIL_LOGGING_RAW_H_ diff --git a/icing/util/logging_test.cc b/icing/util/logging_test.cc new file mode 100644 index 0000000..eac018e --- /dev/null +++ b/icing/util/logging_test.cc @@ -0,0 +1,158 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/logging.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/proto/debug.pb.h" +#include "icing/util/logging_raw.h" + +namespace icing { +namespace lib { + +namespace { +using ::testing::EndsWith; +using ::testing::IsEmpty; + +TEST(LoggingTest, SetLoggingLevelWithInvalidArguments) { + EXPECT_FALSE(SetLoggingLevel(LogSeverity::DBG, 1)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::INFO, 1)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::WARNING, 1)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::ERROR, 1)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::FATAL, 1)); + + EXPECT_FALSE(SetLoggingLevel(LogSeverity::DBG, 2)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::INFO, 2)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::WARNING, 2)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::ERROR, 2)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::FATAL, 2)); + + EXPECT_FALSE(SetLoggingLevel(LogSeverity::VERBOSE, -1)); +} + +TEST(LoggingTest, SetLoggingLevelTest) { + // Set to INFO + ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + + // Set to WARNING + ASSERT_TRUE(SetLoggingLevel(LogSeverity::WARNING)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + + // Set to DEBUG + ASSERT_TRUE(SetLoggingLevel(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); +} + +TEST(LoggingTest, VerboseLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 1)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_TRUE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, VerboseLoggingIsControlledByVerbosity) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 2)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 3)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 2)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1)); + + ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 2)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1)); + + ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 0)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 0)); + + // Negative verbosity is invalid. + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, -1)); +} + +TEST(LoggingTest, DebugLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_TRUE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, InfoLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, WarningLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::WARNING)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, ErrorLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::ERROR)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::INFO)); + EXPECT_FALSE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, FatalLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::FATAL)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::INFO)); + EXPECT_FALSE(ShouldLog(LogSeverity::WARNING)); + EXPECT_FALSE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, LoggingStreamTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO)); + // This one should be logged. + LoggingStringStream stream1 = (ICING_LOG(INFO) << "Hello" + << "World!"); + EXPECT_THAT(stream1.message, EndsWith("HelloWorld!")); + + // This one should not be logged, thus empty. + LoggingStringStream stream2 = (ICING_LOG(DBG) << "Hello" + << "World!"); + EXPECT_THAT(stream2.message, IsEmpty()); +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java index 95e0c84..16a4a4a 100644 --- a/java/src/com/google/android/icing/IcingSearchEngine.java +++ b/java/src/com/google/android/icing/IcingSearchEngine.java @@ -16,6 +16,9 @@ package com.google.android.icing; import android.util.Log; import androidx.annotation.NonNull; +import androidx.annotation.Nullable; +import com.google.android.icing.proto.DebugInfoResultProto; +import com.google.android.icing.proto.DebugInfoVerbosity; import com.google.android.icing.proto.DeleteByNamespaceResultProto; import com.google.android.icing.proto.DeleteByQueryResultProto; import com.google.android.icing.proto.DeleteBySchemaTypeResultProto; @@ -29,6 +32,7 @@ import com.google.android.icing.proto.GetSchemaResultProto; import com.google.android.icing.proto.GetSchemaTypeResultProto; import com.google.android.icing.proto.IcingSearchEngineOptions; import com.google.android.icing.proto.InitializeResultProto; +import com.google.android.icing.proto.LogSeverity; import com.google.android.icing.proto.OptimizeResultProto; import com.google.android.icing.proto.PersistToDiskResultProto; import com.google.android.icing.proto.PersistType; @@ -74,7 +78,9 @@ public class IcingSearchEngine implements Closeable { System.loadLibrary("icing"); } - /** @throws IllegalStateException if IcingSearchEngine fails to be created */ + /** + * @throws IllegalStateException if IcingSearchEngine fails to be created + */ public IcingSearchEngine(@NonNull IcingSearchEngineOptions options) { nativePointer = nativeCreate(options.toByteArray()); if (nativePointer == 0) { @@ -439,9 +445,16 @@ public class IcingSearchEngine implements Closeable { @NonNull public DeleteByQueryResultProto deleteByQuery(@NonNull SearchSpecProto searchSpec) { + return deleteByQuery(searchSpec, /*returnDeletedDocumentInfo=*/ false); + } + + @NonNull + public DeleteByQueryResultProto deleteByQuery( + @NonNull SearchSpecProto searchSpec, boolean returnDeletedDocumentInfo) { throwIfClosed(); - byte[] deleteResultBytes = nativeDeleteByQuery(this, searchSpec.toByteArray()); + byte[] deleteResultBytes = + nativeDeleteByQuery(this, searchSpec.toByteArray(), returnDeletedDocumentInfo); if (deleteResultBytes == null) { Log.e(TAG, "Received null DeleteResultProto from native."); return DeleteByQueryResultProto.newBuilder() @@ -539,8 +552,7 @@ public class IcingSearchEngine implements Closeable { } try { - return StorageInfoResultProto.parseFrom( - storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE); + return StorageInfoResultProto.parseFrom(storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE); } catch (InvalidProtocolBufferException e) { Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e); return StorageInfoResultProto.newBuilder() @@ -550,6 +562,28 @@ public class IcingSearchEngine implements Closeable { } @NonNull + public DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity) { + throwIfClosed(); + + byte[] debugInfoResultProtoBytes = nativeGetDebugInfo(this, verbosity.getNumber()); + if (debugInfoResultProtoBytes == null) { + Log.e(TAG, "Received null DebugInfoResultProto from native."); + return DebugInfoResultProto.newBuilder() + .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL)) + .build(); + } + + try { + return DebugInfoResultProto.parseFrom(debugInfoResultProtoBytes, EXTENSION_REGISTRY_LITE); + } catch (InvalidProtocolBufferException e) { + Log.e(TAG, "Error parsing DebugInfoResultProto.", e); + return DebugInfoResultProto.newBuilder() + .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL)) + .build(); + } + } + + @NonNull public ResetResultProto reset() { throwIfClosed(); @@ -571,6 +605,31 @@ public class IcingSearchEngine implements Closeable { } } + public static boolean shouldLog(LogSeverity.Code severity) { + return shouldLog(severity, (short) 0); + } + + public static boolean shouldLog(LogSeverity.Code severity, short verbosity) { + return nativeShouldLog((short) severity.getNumber(), verbosity); + } + + public static boolean setLoggingLevel(LogSeverity.Code severity) { + return setLoggingLevel(severity, (short) 0); + } + + public static boolean setLoggingLevel(LogSeverity.Code severity, short verbosity) { + return nativeSetLoggingLevel((short) severity.getNumber(), verbosity); + } + + @Nullable + public static String getLoggingTag() { + String tag = nativeGetLoggingTag(); + if (tag == null) { + Log.e(TAG, "Received null logging tag from native."); + } + return tag; + } + private static native long nativeCreate(byte[] icingSearchEngineOptionsBytes); private static native void nativeDestroy(IcingSearchEngine instance); @@ -615,7 +674,7 @@ public class IcingSearchEngine implements Closeable { IcingSearchEngine instance, String schemaType); private static native byte[] nativeDeleteByQuery( - IcingSearchEngine instance, byte[] searchSpecBytes); + IcingSearchEngine instance, byte[] searchSpecBytes, boolean returnDeletedDocumentInfo); private static native byte[] nativePersistToDisk(IcingSearchEngine instance, int persistType); @@ -629,4 +688,12 @@ public class IcingSearchEngine implements Closeable { private static native byte[] nativeSearchSuggestions( IcingSearchEngine instance, byte[] suggestionSpecBytes); + + private static native byte[] nativeGetDebugInfo(IcingSearchEngine instance, int verbosity); + + private static native boolean nativeShouldLog(short severity, short verbosity); + + private static native boolean nativeSetLoggingLevel(short severity, short verbosity); + + private static native String nativeGetLoggingTag(); } diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java index a46814c..c690990 100644 --- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java +++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java @@ -17,6 +17,8 @@ package com.google.android.icing; import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertWithMessage; +import com.google.android.icing.proto.DebugInfoResultProto; +import com.google.android.icing.proto.DebugInfoVerbosity; import com.google.android.icing.proto.DeleteByNamespaceResultProto; import com.google.android.icing.proto.DeleteByQueryResultProto; import com.google.android.icing.proto.DeleteBySchemaTypeResultProto; @@ -30,6 +32,7 @@ import com.google.android.icing.proto.GetSchemaResultProto; import com.google.android.icing.proto.GetSchemaTypeResultProto; import com.google.android.icing.proto.IcingSearchEngineOptions; import com.google.android.icing.proto.InitializeResultProto; +import com.google.android.icing.proto.LogSeverity; import com.google.android.icing.proto.OptimizeResultProto; import com.google.android.icing.proto.PersistToDiskResultProto; import com.google.android.icing.proto.PersistType; @@ -389,6 +392,60 @@ public final class IcingSearchEngineTest { DeleteByQueryResultProto deleteResultProto = icingSearchEngine.deleteByQuery(searchSpec); assertStatusOk(deleteResultProto.getStatus()); + // By default, the deleteByQuery API does not return the summary about deleted documents, unless + // the returnDeletedDocumentInfo parameter is set to true. + assertThat(deleteResultProto.getDeletedDocumentsList()).isEmpty(); + + GetResultProto getResultProto = + icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance()); + assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND); + getResultProto = + icingSearchEngine.get("namespace", "uri2", GetResultSpecProto.getDefaultInstance()); + assertStatusOk(getResultProto.getStatus()); + } + + @Test + public void testDeleteByQueryWithDeletedDocumentInfo() throws Exception { + assertStatusOk(icingSearchEngine.initialize().getStatus()); + + SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig(); + SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build(); + assertThat( + icingSearchEngine + .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false) + .getStatus() + .getCode()) + .isEqualTo(StatusProto.Code.OK); + + DocumentProto emailDocument1 = + createEmailDocument("namespace", "uri1").toBuilder() + .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo")) + .build(); + + assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus()); + DocumentProto emailDocument2 = + createEmailDocument("namespace", "uri2").toBuilder() + .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("bar")) + .build(); + + assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus()); + + SearchSpecProto searchSpec = + SearchSpecProto.newBuilder() + .setQuery("foo") + .setTermMatchType(TermMatchType.Code.PREFIX) + .build(); + + DeleteByQueryResultProto deleteResultProto = + icingSearchEngine.deleteByQuery(searchSpec, /*returnDeletedDocumentInfo=*/ true); + assertStatusOk(deleteResultProto.getStatus()); + DeleteByQueryResultProto.DocumentGroupInfo info = + DeleteByQueryResultProto.DocumentGroupInfo.newBuilder() + .setNamespace("namespace") + .setSchema("Email") + .addUris("uri1") + .build(); + assertThat(deleteResultProto.getDeletedDocumentsList()).containsExactly(info); GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance()); @@ -434,6 +491,35 @@ public final class IcingSearchEngineTest { } @Test + public void testGetDebugInfo() throws Exception { + assertStatusOk(icingSearchEngine.initialize().getStatus()); + + SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig(); + SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build(); + assertThat( + icingSearchEngine + .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false) + .getStatus() + .getCode()) + .isEqualTo(StatusProto.Code.OK); + + DocumentProto emailDocument = createEmailDocument("namespace", "uri"); + assertStatusOk(icingSearchEngine.put(emailDocument).getStatus()); + + DebugInfoResultProto debugInfoResultProtoBasic = + icingSearchEngine.getDebugInfo(DebugInfoVerbosity.Code.BASIC); + assertStatusOk(debugInfoResultProtoBasic.getStatus()); + assertThat(debugInfoResultProtoBasic.getDebugInfo().getDocumentInfo().getCorpusInfoList()) + .isEmpty(); // because verbosity=BASIC + + DebugInfoResultProto debugInfoResultProtoDetailed = + icingSearchEngine.getDebugInfo(DebugInfoVerbosity.Code.DETAILED); + assertStatusOk(debugInfoResultProtoDetailed.getStatus()); + assertThat(debugInfoResultProtoDetailed.getDebugInfo().getDocumentInfo().getCorpusInfoList()) + .hasSize(1); // because verbosity=DETAILED + } + + @Test public void testGetAllNamespaces() throws Exception { assertStatusOk(icingSearchEngine.initialize().getStatus()); @@ -668,6 +754,31 @@ public final class IcingSearchEngineTest { assertThat(response.getSuggestions(1).getQuery()).isEqualTo("fo"); } + @Test + public void testLogging() throws Exception { + // Set to INFO + assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.INFO)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isFalse(); + + // Set to WARNING + assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.WARNING)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.WARNING)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isFalse(); + + // Set to DEBUG + assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.DBG)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE)).isFalse(); + + // Set to VERBOSE + assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.VERBOSE, (short) 1)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 1)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 2)).isFalse(); + + assertThat(IcingSearchEngine.getLoggingTag()).isNotEmpty(); + } + private static void assertStatusOk(StatusProto status) { assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK); } diff --git a/proto/icing/proto/debug.proto b/proto/icing/proto/debug.proto index 504ae43..90d1981 100644 --- a/proto/icing/proto/debug.proto +++ b/proto/icing/proto/debug.proto @@ -24,48 +24,57 @@ option java_package = "com.google.android.icing.proto"; option java_multiple_files = true; option objc_class_prefix = "ICNG"; +message LogSeverity { + enum Code { + VERBOSE = 0; + // Unable to use DEBUG at this time because it breaks YTM's iOS tests + // cs/?q=%22-DDEBUG%3D1%22%20f:%2FYoutubeMusic%20f:blueprint&ssfr=1 + DBG = 1; + INFO = 2; + WARNING = 3; + ERROR = 4; + FATAL = 5; + } +} + +message DebugInfoVerbosity { + enum Code { + // Simplest debug information. + BASIC = 0; + // More detailed debug information as indicated in the field documentation + // below. + DETAILED = 1; + } +} + // Next tag: 4 message IndexDebugInfoProto { // Storage information of the index. optional IndexStorageInfoProto index_storage_info = 1; - message MainIndexDebugInfoProto { - // Information about the main lexicon. - // TODO(b/222349894) Convert the string output to a protocol buffer instead. - optional string lexicon_info = 1; - - // Last added document id. - optional uint32 last_added_document_id = 2; - - // If verbosity > 0, return information about the posting list storage. - // TODO(b/222349894) Convert the string output to a protocol buffer instead. - optional string flash_index_storage_info = 3; - } - optional MainIndexDebugInfoProto main_index_info = 2; - - message LiteIndexDebugInfoProto { - // Current number of hits. - optional uint32 curr_size = 1; - - // The maximum possible number of hits. - optional uint32 hit_buffer_size = 2; - - // Last added document id. - optional uint32 last_added_document_id = 3; - - // The first position in the hit buffer that is not sorted yet, - // or curr_size if all hits are sorted. - optional uint32 searchable_end = 4; - - // The most recent checksum of the lite index, by calling - // LiteIndex::ComputeChecksum(). - optional uint32 index_crc = 5; - - // Information about the lite lexicon. - // TODO(b/222349894) Convert the string output to a protocol buffer instead. - optional string lexicon_info = 6; - } - optional LiteIndexDebugInfoProto lite_index_info = 3; + // A formatted string containing the following information: + // lexicon_info: Information about the main lexicon + // last_added_document_id: Last added document id + // flash_index_storage_info: If verbosity = DETAILED, return information about + // the posting list storage + // + // No direct contents from user-provided documents will ever appear in this + // string. + optional string main_index_info = 2; + + // A formatted string containing the following information: + // curr_size: Current number of hits + // hit_buffer_size: The maximum possible number of hits + // last_added_document_id: Last added document id + // searchable_end: The first position in the hit buffer that is not sorted + // yet, or curr_size if all hits are sorted + // index_crc: The most recent checksum of the lite index, by calling + // LiteIndex::ComputeChecksum() + // lexicon_info: Information about the lite lexicon + // + // No direct contents from user-provided documents will ever appear in this + // string. + optional string lite_index_info = 3; } // Next tag: 4 @@ -84,8 +93,8 @@ message DocumentDebugInfoProto { optional uint32 total_token = 4; } - // If verbosity > 0, return the total number of documents and tokens in each - // (namespace, schema type) pair. + // If verbosity = DETAILED, return the total number of documents and tokens in + // each (namespace, schema type) pair. // Note that deleted and expired documents are skipped in the output. repeated CorpusInfo corpus_info = 3; } @@ -117,7 +126,8 @@ message DebugInfoProto { message DebugInfoResultProto { // Status code can be one of: // OK - // FAILED_PRECONDITION + // FAILED_PRECONDITION if IcingSearchEngine has not been initialized yet + // INTERNAL on IO errors, crc compute error. // // See status.proto for more details. optional StatusProto status = 1; diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index 73d349b..305f410 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=436284873) +set(synced_AOSP_CL_number=455217954) |