aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTerry Wang <tytytyww@google.com>2023-09-01 22:41:34 -0700
committerTerry Wang <tytytyww@google.com>2023-09-03 00:42:21 -0700
commitf0b8d567592a9bff8c188fece9148dce6a2df6f7 (patch)
tree38a479a253e897c63a7917566eb172e0962affc0
parent9fc1fa217d63238b3bcbe0e2edbeabb33b154f69 (diff)
parent8fe5f19051d2cf565afc1918acca448b133b5c03 (diff)
downloadicing-f0b8d567592a9bff8c188fece9148dce6a2df6f7.tar.gz
Merge remote-tracking branch 'goog/androidx-platform-dev' into udc-mainline-prod
* goog/androidx-platform-dev: Update Icing from upstream. Descriptions: ======================================================================== [fixit] Remove incorrect reserve usage for STL data structure ======================================================================== Move LiteIndex::SortHits() from the query path to the indexing path ======================================================================== Fix term frequency bug ======================================================================== Delete dead JNI functions. ======================================================================== Switch Icing JNI implementation to use RegisterNatives ======================================================================== Avoid unnecessary GetObjectClass and GetFieldID calls. ======================================================================== Add join test for SetSchemaNewIndexedDocumentPropertyTriggersIndexRestorationAndReturnsOk ======================================================================== Support different sets of properties to search for different schema types. ======================================================================== Do not cache the section mask for a schema type if no property filters apply while processing the type property filters. ======================================================================== Bug: 286418010 Bug: 297549761 Bug: 248295995 Bug: 296938196 Bug: 288310393 Bug: 291019114 Test: presubmit Change-Id: Iab66bcc3f92641122fae2ac36132f61d8e4b4f57
-rw-r--r--Android.bp2
-rw-r--r--build.gradle79
-rw-r--r--icing/icing-search-engine.cc8
-rw-r--r--icing/icing-search-engine_initialization_test.cc44
-rw-r--r--icing/icing-search-engine_schema_test.cc134
-rw-r--r--icing/icing-search-engine_search_test.cc541
-rw-r--r--icing/index/index-processor_benchmark.cc4
-rw-r--r--icing/index/index-processor_test.cc12
-rw-r--r--icing/index/index.cc21
-rw-r--r--icing/index/index.h30
-rw-r--r--icing/index/index_test.cc66
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc179
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.h52
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc4
-rw-r--r--icing/index/lite/lite-index-options.cc10
-rw-r--r--icing/index/lite/lite-index-options.h6
-rw-r--r--icing/index/lite/lite-index.cc269
-rw-r--r--icing/index/lite/lite-index.h83
-rw-r--r--icing/index/lite/lite-index_test.cc641
-rw-r--r--icing/index/lite/lite-index_thread-safety_test.cc7
-rw-r--r--icing/index/lite/term-id-hit-pair.h2
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.cc86
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.h37
-rw-r--r--icing/index/main/main-index-merger_test.cc4
-rw-r--r--icing/index/main/main-index_test.cc76
-rw-r--r--icing/index/numeric/dummy-numeric-index.h18
-rw-r--r--icing/index/string-section-indexing-handler.cc11
-rw-r--r--icing/index/string-section-indexing-handler_test.cc587
-rw-r--r--icing/jni.lds1
-rw-r--r--icing/jni/icing-search-engine-jni.cc451
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc4
-rw-r--r--icing/query/query-processor.cc7
-rw-r--r--icing/query/query-processor_benchmark.cc4
-rw-r--r--icing/query/query-processor_test.cc259
-rw-r--r--icing/query/suggestion-processor_test.cc4
-rw-r--r--icing/schema/schema-store.cc11
-rw-r--r--icing/schema/schema-store.h9
-rw-r--r--icing/schema/schema-util.cc5
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngine.java1
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngineImpl.java1
-rw-r--r--java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java95
-rw-r--r--proto/icing/proto/initialize.proto22
-rw-r--r--proto/icing/proto/logging.proto7
-rw-r--r--proto/icing/proto/search.proto12
-rw-r--r--synced_AOSP_CL_number.txt2
45 files changed, 3099 insertions, 809 deletions
diff --git a/Android.bp b/Android.bp
index 3012e8b..747e493 100644
--- a/Android.bp
+++ b/Android.bp
@@ -54,6 +54,8 @@ cc_defaults {
"-funsigned-char",
"-fvisibility=hidden",
+
+ "-Bsymbolic",
],
apex_available: ["com.android.appsearch"],
}
diff --git a/build.gradle b/build.gradle
index dde0637..d0d1a39 100644
--- a/build.gradle
+++ b/build.gradle
@@ -14,57 +14,42 @@
* limitations under the License.
*/
+import androidx.build.SdkHelperKt
+
plugins {
- id('AndroidXPlugin')
- id('com.android.library')
- id('com.google.protobuf')
+ id("AndroidXPlugin")
+ id("java-library")
+ id("com.google.protobuf")
}
-android {
- compileOptions {
- sourceCompatibility = JavaVersion.VERSION_1_8
- targetCompatibility = JavaVersion.VERSION_1_8
- }
- sourceSets {
- main {
- java.srcDir 'java/src/'
- proto.srcDir 'proto/'
- }
- // TODO(b/161205849): Re-enable this test once icing nativeLib is no longer being built
- // inside appsearch:appsearch.
- //androidTest.java.srcDir 'java/tests/instrumentation/'
+sourceSets {
+ main {
+ java.srcDir 'java/src/'
+ proto.srcDir 'proto/'
}
- namespace "com.google.android.icing"
-}
-
-// This project has no device tests, skip building it
-androidComponents {
- beforeVariants(selector().withName("debug"), { variantBuilder ->
- variantBuilder.enableAndroidTest = false
- })
}
dependencies {
- api('androidx.annotation:annotation:1.1.0')
-
- implementation('com.google.protobuf:protobuf-javalite:3.10.0')
+ compileOnly("androidx.annotation:annotation:1.1.0")
+ compileOnly(SdkHelperKt.getSdkDependency(project))
+ implementation(libs.protobufLite)
+}
- androidTestImplementation(libs.testCore)
- androidTestImplementation(libs.testRules)
- androidTestImplementation(libs.truth)
- androidTestImplementation(libs.kotlinBom)
+afterEvaluate {
+ lint {
+ lintOptions {
+ // protobuf generates unannotated methods
+ disable("UnknownNullness")
+ }
+ }
}
protobuf {
protoc {
artifact = libs.protobufCompiler.get()
}
-
generateProtoTasks {
all().each { task ->
- project.tasks.named("extractReleaseAnnotations").configure {
- it.dependsOn(task)
- }
task.builtins {
java {
option 'lite'
@@ -74,30 +59,6 @@ protobuf {
}
}
-// Create export artifact for all variants (debug/release) for JarJaring
-android.libraryVariants.all { variant ->
- def variantName = variant.name
- def suffix = variantName.capitalize()
- def exportJarTask = tasks.register("exportJar${suffix}", Jar) {
- archiveBaseName.set("icing-${variantName}")
-
- // The proto-lite dependency includes .proto files, which are not used by icing. When apps
- // depend on appsearch as well as proto-lite directly, these files conflict since jarjar
- // only renames the java classes. Remove them here since they are unused.
- // Expand the jar and remove any .proto files.
- from(zipTree(configurations.detachedConfiguration(
- dependencies.create(libs.protobufLite.get())).getSingleFile())) {
- exclude("**/*.proto")
- }
-
- from files(variant.javaCompileProvider.get().destinationDir)
- dependsOn variant.javaCompileProvider.get()
- }
-
- def exportConfiguration = configurations.register("export${suffix}")
- artifacts.add(exportConfiguration.name, exportJarTask.flatMap { it.archiveFile })
-}
-
androidx {
mavenVersion = LibraryVersions.APPSEARCH
}
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 467c943..6680dae 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -654,7 +654,9 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
// We're going to need to build the index from scratch. So just delete its
// directory now.
// Discard index directory and instantiate a new one.
- Index::Options index_options(index_dir, options_.index_merge_size());
+ Index::Options index_options(index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(),
+ options_.lite_index_sort_size());
if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
return absl_ports::InternalError(
@@ -798,7 +800,9 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
return absl_ports::InternalError(
absl_ports::StrCat("Could not create directory: ", index_dir));
}
- Index::Options index_options(index_dir, options_.index_merge_size());
+ Index::Options index_options(index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(),
+ options_.lite_index_sort_size());
// Term index
InitializeStatsProto::RecoveryCause index_recovery_cause;
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index d6c8de8..b4853b4 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -1853,7 +1853,9 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(Index::Options(GetIndexDir(),
- /*index_merge_size=*/100),
+ /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
filesystem(), icing_filesystem()));
ICING_ASSERT_OK(index->PersistToDisk());
}
@@ -2463,7 +2465,9 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/message.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
filesystem(), icing_filesystem()));
DocumentId original_last_added_doc_id = index->last_added_document_id();
index->set_last_added_document_id(original_last_added_doc_id + 1);
@@ -2595,7 +2599,9 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/message.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
filesystem(), icing_filesystem()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
@@ -2707,7 +2713,9 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/message.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
filesystem(), icing_filesystem()));
DocumentId original_last_added_doc_id = index->last_added_document_id();
index->set_last_added_document_id(original_last_added_doc_id + 1);
@@ -2844,7 +2852,9 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/message.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
filesystem(), icing_filesystem()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
@@ -2904,7 +2914,9 @@ TEST_F(IcingSearchEngineInitializationTest,
Index::Create(
// index merge size is not important here because we will manually
// invoke merge below.
- Index::Options(GetIndexDir(), /*index_merge_size=*/100),
+ Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
filesystem(), icing_filesystem()));
// Add hits for document 0 and merge.
ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId);
@@ -2980,7 +2992,9 @@ TEST_F(IcingSearchEngineInitializationTest,
{
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
- Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100),
+ Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
filesystem(), icing_filesystem()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
@@ -3096,7 +3110,9 @@ TEST_F(IcingSearchEngineInitializationTest,
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
- /*index_merge_size=*/message.ByteSizeLong()),
+ /*index_merge_size=*/message.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/8),
filesystem(), icing_filesystem()));
// Add hits for document 4 and merge.
DocumentId original_last_added_doc_id = index->last_added_document_id();
@@ -3239,7 +3255,9 @@ TEST_F(IcingSearchEngineInitializationTest,
{
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
- Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100),
+ Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
filesystem(), icing_filesystem()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
@@ -4412,7 +4430,9 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(Index::Options(GetIndexDir(),
- /*index_merge_size=*/100),
+ /*index_merge_size=*/100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/50),
filesystem(), icing_filesystem()));
ICING_ASSERT_OK(index->PersistToDisk());
}
@@ -5241,7 +5261,9 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message));
// Index doc_id with incorrect data
- Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024);
+ Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(options, filesystem(), icing_filesystem()));
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
index 5076d50..0e88c5a 100644
--- a/icing/icing-search-engine_schema_test.cc
+++ b/icing/icing-search-engine_schema_test.cc
@@ -897,8 +897,17 @@ TEST_F(
// - "age": integer type, indexed. Section id = 0
// - "name": string type, indexed. Section id = 1.
// - "worksFor.name": string type, (nested) indexed. Section id = 2.
+ //
+ // Joinable property id assignment for 'Person':
+ // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 0.
SchemaProto schema_one =
SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("title")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
.AddType(SchemaTypeConfigBuilder()
.SetType("Person")
.AddProperty(PropertyConfigBuilder()
@@ -922,38 +931,47 @@ TEST_F(
.SetName("name")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("listRef")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
-
- SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Organization");
- expected_set_schema_result.mutable_new_schema_types()->Add("Person");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- DocumentProto personDocument =
+ ASSERT_THAT(icing.SetSchema(schema_one).status(), ProtoIsOk());
+
+ DocumentProto list_document = DocumentBuilder()
+ .SetKey("namespace", "list/1")
+ .SetSchema("List")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("title", "title")
+ .Build();
+ DocumentProto person_document =
DocumentBuilder()
.SetKey("namespace", "person/2")
.SetSchema("Person")
.SetCreationTimestampMs(1000)
.AddStringProperty("name", "John")
.AddInt64Property("age", 20)
- .AddDocumentProperty("worksFor",
- DocumentBuilder()
- .SetKey("namespace", "org/1")
- .SetSchema("Organization")
- .AddStringProperty("name", "Google")
- .Build())
+ .AddDocumentProperty(
+ "worksFor", DocumentBuilder()
+ .SetKey("namespace", "org/1")
+ .SetSchema("Organization")
+ .AddStringProperty("name", "Google")
+ .AddStringProperty("listRef", "namespace#list/1")
+ .Build())
.Build();
- EXPECT_THAT(icing.Put(personDocument).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(list_document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- personDocument;
+ person_document;
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
@@ -964,8 +982,7 @@ TEST_F(
search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto actual_results =
- icing.Search(search_spec1, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
@@ -976,11 +993,44 @@ TEST_F(
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
- actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
+ // Verify join search: join a query for `title:title` (which will get
+ // list_document) with a child query for `name:John` (which will get
+ // person_document) based on the child's `worksFor.listRef` field.
+ SearchSpecProto search_spec_with_join;
+ search_spec_with_join.set_query("title:title");
+ search_spec_with_join.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec_with_join.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("worksFor.listRef");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("name:John");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = result_spec;
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = list_document;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() =
+ person_document;
+
+ actual_results =
+ icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+
// Change the schema to add another nested document property to 'Person'
//
// New section id assignment for 'Person':
@@ -988,8 +1038,19 @@ TEST_F(
// - "almaMater.name", string type, indexed. Section id = 1
// - "name": string type, indexed. Section id = 2
// - "worksFor.name": string type, (nested) indexed. Section id = 3
+ //
+ // New joinable property id assignment for 'Person':
+ // - "almaMater.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 0.
+ // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 1.
SchemaProto schema_two =
SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("title")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
.AddType(SchemaTypeConfigBuilder()
.SetType("Person")
.AddProperty(PropertyConfigBuilder()
@@ -1019,6 +1080,11 @@ TEST_F(
.SetName("name")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("listRef")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
@@ -1028,10 +1094,10 @@ TEST_F(
// Index restoration should be triggered here because new schema requires more
// properties to be indexed. Also new section ids will be reassigned and index
// restoration should use new section ids to rebuild.
- set_schema_result = icing.SetSchema(schema_two);
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_two);
// Ignore latency numbers. They're covered elsewhere.
set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
+ SetSchemaResultProto expected_set_schema_result = SetSchemaResultProto();
expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
->Add("Person");
@@ -1041,8 +1107,8 @@ TEST_F(
// Verify term search:
// Searching for "worksFor.name:Google" should still match document
- actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
@@ -1051,16 +1117,22 @@ TEST_F(
// rebuild was not triggered and Icing is still searching the old index, where
// 'worksFor.name' was indexed at section id 2.
search_spec1.set_query("name:Google");
- actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results,
EqualsSearchResultIgnoreStatsAndScores(empty_result));
// Verify numeric (integer) search: should still match document
- actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
+
+ // Verify join search: should still able to join by `worksFor.listRef`
+ actual_results =
+ icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
}
TEST_F(IcingSearchEngineSchemaTest,
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index fd2c939..451c9ce 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc
@@ -3565,6 +3565,547 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionMultipleFieldPaths) {
EqualsProto(projected_document_one));
}
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // Email schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned. Although 'hello' is
+ // present in document_two, it shouldn't be in the result since 'hello' is not
+ // in the specified property filter.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersOnMultipleSchema) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Add Person and Organization schema with a property 'name' in both.
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Organization")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("address")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(),
+ ProtoIsOk());
+
+ // 1. Add person document
+ DocumentProto person_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build();
+ ASSERT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ // 1. Add organization document
+ DocumentProto organization_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Organization")
+ .AddStringProperty("name", "Meg Corp")
+ .AddStringProperty("address", "Universal street")
+ .Build();
+ ASSERT_THAT(icing.Put(organization_document).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters. Person schema has name in it's
+ // property filter but Organization schema doesn't.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("Meg");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* person_property_filters =
+ search_spec->add_type_property_filters();
+ person_property_filters->set_schema_type("Person");
+ person_property_filters->add_paths("name");
+ TypePropertyMask* organization_property_filters =
+ search_spec->add_type_property_filters();
+ organization_property_filters->set_schema_type("Organization");
+ organization_property_filters->add_paths("address");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the person document is returned. Although 'Meg' is
+ // present in organization document, it shouldn't be in the result since
+ // the name field is not specified in the Organization property filter.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(person_document));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithWildcardPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // wildcard(*) schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* wildcard_property_filters =
+ search_spec->add_type_property_filters();
+ wildcard_property_filters->set_schema_type("*");
+ wildcard_property_filters->add_paths("sender.name");
+ wildcard_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned since the second
+ // document doesn't contain the word 'hello' in either of fields specified in
+ // the property filter. This confirms that the property filters for the
+ // wildcard entry have been applied to the Email schema as well.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithMixedPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // wildcard(*) schema type plus property filters of sender.name and body for
+ // the Email schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* wildcard_property_filters =
+ search_spec->add_type_property_filters();
+ wildcard_property_filters->set_schema_type("*");
+ wildcard_property_filters->add_paths("sender.name");
+ wildcard_property_filters->add_paths("subject");
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("body");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the second document is returned since the first
+ // document doesn't contain the word 'hello' in either of fields sender.name
+ // or body. This confirms that the property filters specified for Email schema
+ // have been applied and the ones specified for wildcard entry have been
+ // ignored.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_two));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithNonApplicablePropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for an
+ // unknown schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("unknown");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ // 3. Verify that both the documents are returned since each of them have the
+ // word 'hello' in at least 1 property. The second document being returned
+ // confirms that the body field was searched and the specified property
+ // filters were not applied to the Email schema type.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithEmptyPropertyFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("body", "Hello World!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ // 2. Issue a query with empty property filter for Message schema.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* message_property_filters =
+ search_spec->add_type_property_filters();
+ message_property_filters->set_schema_type("Message");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+
+ // 3. Verify that no documents are returned. Although 'hello' is present in
+ // the indexed document, it shouldn't be returned since the Message property
+ // filter doesn't allow any properties to be searched.
+ ASSERT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithPropertyFilterHavingInvalidProperty) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("body", "Hello World!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filter having invalid/unknown property for
+ // Message schema.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* message_property_filters =
+ search_spec->add_type_property_filters();
+ message_property_filters->set_schema_type("Message");
+ message_property_filters->add_paths("unknown");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+
+ // 3. Verify that no documents are returned. Although 'hello' is present in
+ // the indexed document, it shouldn't be returned since the Message property
+ // filter doesn't allow any valid properties to be searched. Any
+ // invalid/unknown properties specified in the property filters will be
+ // ignored while searching.
+ ASSERT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersWithNesting) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filter of sender.emailAddress for the Email
+ // schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.emailAddress");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned since the second
+ // document doesn't contain the word 'hello' in sender.emailAddress. The first
+ // document being returned confirms that the nested property
+ // sender.emailAddress was actually searched.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(5);
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 9fee925..8766f0b 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -150,7 +150,9 @@ DocumentProto CreateDocumentWithHiragana(int content_length) {
std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
const Filesystem& filesystem,
const std::string& index_dir) {
- Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
+ Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
}
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index c2eaf62..ba4ece3 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -167,7 +167,9 @@ class IndexProcessorTest : public Test {
schema_store_dir_ = base_dir_ + "/schema_store";
doc_store_dir_ = base_dir_ + "/doc_store";
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
@@ -970,7 +972,9 @@ TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) {
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
document));
Index::Options options(index_dir_,
- /*index_merge_size=*/document.ByteSizeLong() * 100);
+ /*index_merge_size=*/document.ByteSizeLong() * 100,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
@@ -1033,7 +1037,9 @@ TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
// 2. Recreate the index with the mock filesystem and a merge size that will
// only allow one document to be added before requiring a merge.
Index::Options options(index_dir_,
- /*index_merge_size=*/document.ByteSizeLong());
+ /*index_merge_size=*/document.ByteSizeLong(),
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/16);
ICING_ASSERT_OK_AND_ASSIGN(
index_,
Index::Create(options, &filesystem_, mock_icing_filesystem_.get()));
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 19edbb6..31dcc7e 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -14,31 +14,38 @@
#include "icing/index/index.h"
+#include <algorithm>
+#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator-or.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
#include "icing/index/lite/lite-index.h"
#include "icing/index/main/doc-hit-info-iterator-term-main.h"
+#include "icing/index/main/main-index.h"
#include "icing/index/term-id-codec.h"
-#include "icing/index/term-property-id.h"
+#include "icing/index/term-metadata.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/scoring.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/scoring/ranker.h"
#include "icing/store/document-id.h"
+#include "icing/store/suggestion-result-checker.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -59,7 +66,9 @@ libtextclassifier3::StatusOr<LiteIndex::Options> CreateLiteIndexOptions(
options.index_merge_size));
}
return LiteIndex::Options(options.base_dir + "/idx/lite.",
- options.index_merge_size);
+ options.index_merge_size,
+ options.lite_index_sort_at_indexing,
+ options.lite_index_sort_size);
}
std::string MakeMainIndexFilepath(const std::string& base_dir) {
@@ -151,9 +160,17 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
IcingDynamicTrie::max_value_index(GetMainLexiconOptions()),
IcingDynamicTrie::max_value_index(
lite_index_options.lexicon_options)));
+
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<LiteIndex> lite_index,
LiteIndex::Create(lite_index_options, icing_filesystem));
+ // Sort the lite index if we've enabled sorting the HitBuffer at indexing
+ // time, and there's an unsorted tail exceeding the threshold.
+ if (options.lite_index_sort_at_indexing &&
+ lite_index->HasUnsortedHitsExceedingSortThreshold()) {
+ lite_index->SortHits();
+ }
+
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<MainIndex> main_index,
MainIndex::Create(MakeMainIndexFilepath(options.base_dir), filesystem,
diff --git a/icing/index/index.h b/icing/index/index.h
index c170278..32ea97b 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -18,8 +18,9 @@
#include <cstdint>
#include <memory>
#include <string>
-#include <unordered_set>
+#include <unordered_map>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -27,6 +28,7 @@
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/lite-index.h"
+#include "icing/index/lite/term-id-hit-pair.h"
#include "icing/index/main/main-index-merger.h"
#include "icing/index/main/main-index.h"
#include "icing/index/term-id-codec.h"
@@ -40,7 +42,7 @@
#include "icing/store/document-id.h"
#include "icing/store/namespace-id.h"
#include "icing/store/suggestion-result-checker.h"
-#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -68,11 +70,18 @@ namespace lib {
class Index {
public:
struct Options {
- explicit Options(const std::string& base_dir, uint32_t index_merge_size)
- : base_dir(base_dir), index_merge_size(index_merge_size) {}
+ explicit Options(const std::string& base_dir, uint32_t index_merge_size,
+ bool lite_index_sort_at_indexing,
+ uint32_t lite_index_sort_size)
+ : base_dir(base_dir),
+ index_merge_size(index_merge_size),
+ lite_index_sort_at_indexing(lite_index_sort_at_indexing),
+ lite_index_sort_size(lite_index_sort_size) {}
std::string base_dir;
int32_t index_merge_size;
+ bool lite_index_sort_at_indexing;
+ int32_t lite_index_sort_size;
};
// Creates an instance of Index in the directory pointed by file_dir.
@@ -279,6 +288,19 @@ class Index {
return lite_index_->Reset();
}
+ // Whether the LiteIndex HitBuffer requires sorting. This is only true if
+ // Icing has enabled sorting during indexing time, and the HitBuffer's
+ // unsorted tail has exceeded the lite_index_sort_size.
+ bool LiteIndexNeedSort() const {
+ return options_.lite_index_sort_at_indexing &&
+ lite_index_->HasUnsortedHitsExceedingSortThreshold();
+ }
+
+ // Sorts the LiteIndex HitBuffer.
+ void SortLiteIndex() {
+ lite_index_->SortHits();
+ }
+
// Reduces internal file sizes by reclaiming space of deleted documents.
// new_last_added_document_id will be used to update the last added document
// id in the lite index.
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index d563bcb..b823535 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -58,6 +58,7 @@ using ::testing::Eq;
using ::testing::Ge;
using ::testing::Gt;
using ::testing::IsEmpty;
+using ::testing::IsFalse;
using ::testing::IsTrue;
using ::testing::Ne;
using ::testing::NiceMock;
@@ -75,7 +76,9 @@ class IndexTest : public Test {
protected:
void SetUp() override {
index_dir_ = GetTestTempDir() + "/index_test/";
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
}
@@ -146,7 +149,9 @@ MATCHER_P2(EqualsTermMetadata, content, hit_count, "") {
}
TEST_F(IndexTest, CreationWithNullPointerShouldFail) {
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
EXPECT_THAT(
Index::Create(options, &filesystem_, /*icing_filesystem=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
@@ -192,6 +197,36 @@ TEST_F(IndexTest, EmptyIndexAfterMerge) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
+TEST_F(IndexTest, CreationWithLiteIndexSortAtIndexingEnabledShouldSort) {
+ // Make the index with lite_index_sort_at_indexing=false and a very small sort
+ // threshold.
+ Index::Options options(index_dir_, /*index_merge_size=*/1024,
+ /*lite_index_sort_at_indexing=*/false,
+ /*lite_index_sort_size=*/16);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("baz"), IsOk());
+ ASSERT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ // Persist and recreate the index with lite_index_sort_at_indexing=true
+ ASSERT_THAT(index_->PersistToDisk(), IsOk());
+ options = Index::Options(index_dir_, /*index_merge_size=*/1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/16);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ // Check that the index is sorted after recreating with
+ // lite_index_sort_at_indexing, with the unsorted HitBuffer exceeding the sort
+ // threshold.
+ EXPECT_THAT(index_->LiteIndexNeedSort(), IsFalse());
+}
+
TEST_F(IndexTest, AdvancePastEnd) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
@@ -967,7 +1002,9 @@ TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
TEST_F(IndexTest, FullIndex) {
// Make a smaller index so that it's easier to fill up.
- Index::Options options(index_dir_, /*index_merge_size=*/1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
@@ -1035,7 +1072,9 @@ TEST_F(IndexTest, FullIndex) {
TEST_F(IndexTest, FullIndexMerge) {
// Make a smaller index so that it's easier to fill up.
- Index::Options options(index_dir_, /*index_merge_size=*/1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
@@ -1368,7 +1407,9 @@ TEST_F(IndexTest, IndexCreateIOFailure) {
NiceMock<IcingMockFilesystem> mock_icing_filesystem;
ON_CALL(mock_icing_filesystem, CreateDirectoryRecursively)
.WillByDefault(Return(false));
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
EXPECT_THAT(Index::Create(options, &filesystem_, &mock_icing_filesystem),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
@@ -1399,7 +1440,9 @@ TEST_F(IndexTest, IndexCreateCorruptionFailure) {
IsTrue());
// Recreate the index.
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
}
@@ -1417,7 +1460,9 @@ TEST_F(IndexTest, IndexPersistence) {
index_.reset();
// Recreate the index.
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
@@ -1446,7 +1491,9 @@ TEST_F(IndexTest, IndexPersistenceAfterMerge) {
index_.reset();
// Recreate the index.
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
@@ -1463,7 +1510,8 @@ TEST_F(IndexTest, IndexPersistenceAfterMerge) {
TEST_F(IndexTest, InvalidHitBufferSize) {
Index::Options options(
- index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max());
+ index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max(),
+ /*lite_index_sort_at_indexing=*/true, /*lite_index_sort_size=*/1024 * 8);
EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 227a185..b850a9b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -42,8 +42,99 @@ DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
: delegate_(std::move(delegate)),
document_store_(*document_store),
schema_store_(*schema_store),
- target_sections_(std::move(target_sections)),
- current_time_ms_(current_time_ms) {}
+ current_time_ms_(current_time_ms) {
+ type_property_filters_[std::string(SchemaStore::kSchemaTypeWildcard)] =
+ std::move(target_sections);
+}
+
+DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const SearchSpecProto& search_spec,
+ int64_t current_time_ms)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ current_time_ms_(current_time_ms) {
+ // TODO(b/294274922): Add support for polymorphism in type property filters.
+ for (const TypePropertyMask& type_property_mask :
+ search_spec.type_property_filters()) {
+ type_property_filters_[type_property_mask.schema_type()] =
+ std::set<std::string>(type_property_mask.paths().begin(),
+ type_property_mask.paths().end());
+ }
+}
+
+DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters,
+ std::unordered_map<std::string, SectionIdMask> type_property_masks,
+ int64_t current_time_ms)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ current_time_ms_(current_time_ms),
+ type_property_filters_(std::move(type_property_filters)),
+ type_property_masks_(std::move(type_property_masks)) {}
+
+SectionIdMask DocHitInfoIteratorSectionRestrict::GenerateSectionMask(
+ const std::string& schema_type,
+ const std::set<std::string>& target_sections) const {
+ SectionIdMask section_mask = kSectionIdMaskNone;
+ auto section_metadata_list_or =
+ schema_store_.GetSectionMetadata(schema_type);
+ if (!section_metadata_list_or.ok()) {
+ // The current schema doesn't have section metadata.
+ return kSectionIdMaskNone;
+ }
+ const std::vector<SectionMetadata>* section_metadata_list =
+ section_metadata_list_or.ValueOrDie();
+ for (const SectionMetadata& section_metadata : *section_metadata_list) {
+ if (target_sections.find(section_metadata.path) !=
+ target_sections.end()) {
+ section_mask |= UINT64_C(1) << section_metadata.id;
+ }
+ }
+ return section_mask;
+}
+
+SectionIdMask DocHitInfoIteratorSectionRestrict::
+ ComputeAndCacheSchemaTypeAllowedSectionsMask(
+ const std::string& schema_type) {
+ if (const auto type_property_mask_itr =
+ type_property_masks_.find(schema_type);
+ type_property_mask_itr != type_property_masks_.end()) {
+ return type_property_mask_itr->second;
+ }
+
+ // Section id mask of schema_type is never calculated before, so
+ // calculate it here and put it into type_property_masks_.
+ // - If type property filters of schema_type or wildcard (*) are
+ // specified, then create a mask according to the filters.
+ // - Otherwise, create a mask to match all properties.
+ SectionIdMask new_section_id_mask = kSectionIdMaskAll;
+ if (const auto itr = type_property_filters_.find(schema_type);
+ itr != type_property_filters_.end()) {
+ // Property filters defined for given schema type
+ new_section_id_mask = GenerateSectionMask(
+ schema_type, itr->second);
+ } else if (const auto wildcard_itr = type_property_filters_.find(
+ std::string(SchemaStore::kSchemaTypeWildcard));
+ wildcard_itr != type_property_filters_.end()) {
+ // Property filters defined for wildcard entry
+ new_section_id_mask = GenerateSectionMask(
+ schema_type, wildcard_itr->second);
+ } else {
+ // Do not cache the section mask if no property filters apply to this schema
+ // type to avoid taking up unnecessary space.
+ return kSectionIdMaskAll;
+ }
+
+ type_property_masks_[schema_type] = new_section_id_mask;
+ return new_section_id_mask;
+}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
@@ -63,32 +154,32 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
// Guaranteed that the DocumentFilterData exists at this point
SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
-
- // A hit can be in multiple sections at once, need to check which of the
- // section ids match the target sections
- while (section_id_mask != 0) {
- // There was a hit in this section id
- SectionId section_id = __builtin_ctzll(section_id_mask);
-
- auto section_metadata_or =
- schema_store_.GetSectionMetadata(schema_type_id, section_id);
-
- if (section_metadata_or.ok()) {
- const SectionMetadata* section_metadata =
- section_metadata_or.ValueOrDie();
-
- if (target_sections_.find(section_metadata->path) !=
- target_sections_.end()) {
- // The hit was in the target section name, return OK/found
- hit_intersect_section_ids_mask_ |= UINT64_C(1) << section_id;
- }
- }
-
- // Mark this section as checked
- section_id_mask &= ~(UINT64_C(1) << section_id);
+ auto schema_type_or = schema_store_.GetSchemaType(schema_type_id);
+ if (!schema_type_or.ok()) {
+ // Ran into error retrieving schema type, skip
+ continue;
}
+ const std::string* schema_type = std::move(schema_type_or).ValueOrDie();
+ SectionIdMask allowed_sections_mask =
+ ComputeAndCacheSchemaTypeAllowedSectionsMask(*schema_type);
- if (hit_intersect_section_ids_mask_ != kSectionIdMaskNone) {
+ // A hit can be in multiple sections at once, need to check which of the
+ // section ids match the sections allowed by type_property_masks_. This can
+ // be done by doing a bitwise and of the section_id_mask in the doc hit and
+ // the allowed_sections_mask.
+ hit_intersect_section_ids_mask_ = section_id_mask & allowed_sections_mask;
+
+ // Return this document if:
+ // - the sectionIdMask is not empty after applying property filters, or
+ // - no property filters apply for its schema type (allowed_sections_mask
+ // == kSectionIdMaskAll). This is needed to ensure that in case of empty
+ // query (which uses doc-hit-info-iterator-all-document-id), where
+ // section_id_mask (and hence hit_intersect_section_ids_mask_) is
+ // kSectionIdMaskNone, doc hits with no property restrictions don't get
+ // filtered out. Doc hits for schema types for whom property filters are
+ // specified will still get filtered out.
+ if (allowed_sections_mask == kSectionIdMaskAll
+ || hit_intersect_section_ids_mask_ != kSectionIdMaskNone) {
doc_hit_info_ = delegate_->doc_hit_info();
doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
return libtextclassifier3::Status::OK;
@@ -104,16 +195,36 @@ libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
std::move(*delegate_).TrimRightMostNode());
+ // TrimRightMostNode is only used by suggestion processor to process query
+ // expression, so an entry for wildcard should always be present in
+ // type_property_filters_ when code flow reaches here. If the InternalError
+ // below is returned, that means TrimRightMostNode hasn't been called in the
+ // right context.
+ const auto it = type_property_filters_.find("*");
+ if (it == type_property_filters_.end()) {
+ return absl_ports::InternalError(
+ "A wildcard entry should always be present in type property filters "
+ "whenever TrimRightMostNode() is called for "
+ "DocHitInfoIteratorSectionRestrict");
+ }
+ std::set<std::string>& target_sections = it->second;
+ if (target_sections.empty()) {
+ return absl_ports::InternalError(
+ "Target sections should not be empty whenever TrimRightMostNode() is "
+ "called for DocHitInfoIteratorSectionRestrict");
+ }
if (trimmed_delegate.iterator_ == nullptr) {
// TODO(b/228240987): Update TrimmedNode and downstream code to handle
// multiple section restricts.
- trimmed_delegate.target_section_ = std::move(*target_sections_.begin());
+ trimmed_delegate.target_section_ = std::move(*target_sections.begin());
return trimmed_delegate;
}
trimmed_delegate.iterator_ =
- std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
+ new DocHitInfoIteratorSectionRestrict(
std::move(trimmed_delegate.iterator_), &document_store_,
- &schema_store_, std::move(target_sections_), current_time_ms_);
+ &schema_store_, std::move(type_property_filters_),
+ std::move(type_property_masks_), current_time_ms_));
return std::move(trimmed_delegate);
}
@@ -126,8 +237,14 @@ int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const {
}
std::string DocHitInfoIteratorSectionRestrict::ToString() const {
- return absl_ports::StrCat("(", absl_ports::StrJoin(target_sections_, ","),
- "): ", delegate_->ToString());
+ std::string output = "";
+ for (auto it = type_property_filters_.cbegin();
+ it != type_property_filters_.cend(); it++) {
+ std::string paths = absl_ports::StrJoin(it->second, ",");
+ output += (it->first) + ":" + (paths) + "; ";
+ }
+ std::string result = "{" + output.substr(0, output.size() - 2) + "}: ";
+ return absl_ports::StrCat(result, delegate_->ToString());
}
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index 58dd120..5d44ed7 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -19,10 +19,13 @@
#include <memory>
#include <string>
#include <string_view>
+#include <unordered_map>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-store.h"
namespace icing {
@@ -44,6 +47,12 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
const DocumentStore* document_store, const SchemaStore* schema_store,
std::set<std::string> target_sections, int64_t current_time_ms);
+ explicit DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const SearchSpecProto& search_spec,
+ int64_t current_time_ms);
+
libtextclassifier3::Status Advance() override;
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
@@ -72,12 +81,51 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
}
private:
+ explicit DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters,
+ std::unordered_map<std::string, SectionIdMask> type_property_masks,
+ int64_t current_time_ms);
+ // Calculates the section mask of allowed sections(determined by the property
+ // filters map) for the given schema type and caches the same for any future
+ // calls.
+ //
+ // Returns:
+ // - If type_property_filters_ has an entry for the given schema type or
+ // wildcard(*), return a bitwise or of section IDs in the schema type that
+ // that are also present in the relevant filter list.
+ // - Otherwise, return kSectionIdMaskAll.
+ SectionIdMask ComputeAndCacheSchemaTypeAllowedSectionsMask(
+ const std::string& schema_type);
+ // Generates a section mask for the given schema type and the target sections.
+ //
+ // Returns:
+ // - A bitwise or of section IDs in the schema_type that that are also
+ // present in the target_sections list.
+ // - If none of the sections in the schema_type are present in the
+ // target_sections list, return kSectionIdMaskNone.
+ // This is done by doing a bitwise or of the target section ids for the given
+ // schema type.
+ SectionIdMask GenerateSectionMask(const std::string& schema_type,
+ const std::set<std::string>&
+ target_sections) const;
+
std::unique_ptr<DocHitInfoIterator> delegate_;
const DocumentStore& document_store_;
const SchemaStore& schema_store_;
-
- std::set<std::string> target_sections_;
int64_t current_time_ms_;
+
+ // Map of property filters per schema type. Supports wildcard(*) for schema
+ // type that will apply to all schema types that are not specifically
+ // specified in the mapping otherwise.
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters_;
+ // Mapping of schema type to the section mask of allowed sections for that
+ // schema type. This section mask is lazily calculated based on the specified
+ // property filters and cached for any future use.
+ std::unordered_map<std::string, SectionIdMask> type_property_masks_;
};
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 47f55aa..1500571 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -186,7 +186,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
DocHitInfoIteratorSectionRestrict filtered_iterator(
std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), /*target_sections=*/{},
+ schema_store_.get(), /*target_sections=*/std::set<std::string>(),
fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
@@ -391,7 +391,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// Create a hit that exists in a different section, so it shouldn't match any
// section filters
std::vector<DocHitInfo> doc_hit_infos = {
- DocHitInfo(document_id, kSectionIdMaskNone << not_matching_section_id)};
+ DocHitInfo(document_id, UINT64_C(1) << not_matching_section_id)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
diff --git a/icing/index/lite/lite-index-options.cc b/icing/index/lite/lite-index-options.cc
index 29075f8..8780d45 100644
--- a/icing/index/lite/lite-index-options.cc
+++ b/icing/index/lite/lite-index-options.cc
@@ -14,6 +14,8 @@
#include "icing/index/lite/lite-index-options.h"
+#include <cstdint>
+
#include "icing/index/lite/term-id-hit-pair.h"
namespace icing {
@@ -64,9 +66,13 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
} // namespace
LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
- uint32_t hit_buffer_want_merge_bytes)
+ uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing,
+ uint32_t hit_buffer_sort_threshold_bytes)
: filename_base(filename_base),
- hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes) {
+ hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
+ hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
+ hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
lexicon_options = CalculateTrieOptions(hit_buffer_size);
display_mappings_options = CalculateTrieOptions(hit_buffer_size);
diff --git a/icing/index/lite/lite-index-options.h b/icing/index/lite/lite-index-options.h
index ae58802..9f8452c 100644
--- a/icing/index/lite/lite-index-options.h
+++ b/icing/index/lite/lite-index-options.h
@@ -27,7 +27,9 @@ struct LiteIndexOptions {
// hit_buffer_want_merge_bytes and the logic in CalculateHitBufferSize and
// CalculateTrieOptions.
LiteIndexOptions(const std::string& filename_base,
- uint32_t hit_buffer_want_merge_bytes);
+ uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing,
+ uint32_t hit_buffer_sort_threshold_bytes);
IcingDynamicTrie::Options lexicon_options;
IcingDynamicTrie::Options display_mappings_options;
@@ -35,6 +37,8 @@ struct LiteIndexOptions {
std::string filename_base;
uint32_t hit_buffer_want_merge_bytes = 0;
uint32_t hit_buffer_size = 0;
+ bool hit_buffer_sort_at_indexing = false;
+ uint32_t hit_buffer_sort_threshold_bytes = 0;
};
} // namespace lib
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index bf54dec..ec7141a 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -36,6 +36,8 @@
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/hit/hit.h"
#include "icing/index/lite/lite-index-header.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/term-id-codec.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/core/icing-timer.h"
@@ -44,10 +46,13 @@
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mmapper.h"
#include "icing/proto/debug.pb.h"
+#include "icing/proto/scoring.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/suggestion-result-checker.h"
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -160,7 +165,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
}
// Set up header.
- header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+ header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
header_mmap_.address()));
@@ -175,7 +180,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
UpdateChecksum();
} else {
- header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+ header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
header_mmap_.address()));
@@ -352,6 +357,73 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
return tvi;
}
+void LiteIndex::ScoreAndAppendFetchedHit(
+ const Hit& hit, SectionIdMask section_id_mask,
+ bool only_from_prefix_sections,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+ const SuggestionResultChecker* suggestion_result_checker,
+ DocumentId& last_document_id, bool& is_last_document_desired,
+ int& total_score_out, std::vector<DocHitInfo>* hits_out,
+ std::vector<Hit::TermFrequencyArray>* term_frequency_out) const {
+ // Check sections.
+ if (((UINT64_C(1) << hit.section_id()) & section_id_mask) == 0) {
+ return;
+ }
+ // Check prefix section only.
+ if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
+ return;
+ }
+ // Check whether this Hit is desired.
+ // TODO(b/230553264) Move common logic into helper function once we support
+ // score term by prefix_hit in lite_index.
+ DocumentId document_id = hit.document_id();
+ bool is_new_document = document_id != last_document_id;
+ if (is_new_document) {
+ last_document_id = document_id;
+ is_last_document_desired =
+ suggestion_result_checker == nullptr ||
+ suggestion_result_checker->BelongsToTargetResults(document_id,
+ hit.section_id());
+ }
+ if (!is_last_document_desired) {
+ // The document is removed or expired or not desired.
+ return;
+ }
+
+ // Score the hit by the strategy
+ switch (score_by) {
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE:
+ total_score_out = 1;
+ break;
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT:
+ if (is_new_document) {
+ ++total_score_out;
+ }
+ break;
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY:
+ if (hit.has_term_frequency()) {
+ total_score_out += hit.term_frequency();
+ } else {
+ ++total_score_out;
+ }
+ break;
+ }
+
+ // Append the Hit or update hit section to the output vector.
+ if (is_new_document && hits_out != nullptr) {
+ hits_out->push_back(DocHitInfo(document_id));
+ if (term_frequency_out != nullptr) {
+ term_frequency_out->push_back(Hit::TermFrequencyArray());
+ }
+ }
+ if (hits_out != nullptr) {
+ hits_out->back().UpdateSection(hit.section_id());
+ if (term_frequency_out != nullptr) {
+ term_frequency_out->back()[hit.section_id()] = hit.term_frequency();
+ }
+ }
+}
+
int LiteIndex::FetchHits(
uint32_t term_id, SectionIdMask section_id_mask,
bool only_from_prefix_sections,
@@ -359,19 +431,38 @@ int LiteIndex::FetchHits(
const SuggestionResultChecker* suggestion_result_checker,
std::vector<DocHitInfo>* hits_out,
std::vector<Hit::TermFrequencyArray>* term_frequency_out) {
- int score = 0;
- DocumentId last_document_id = kInvalidDocumentId;
- // Record whether the last document belongs to the given namespaces.
- bool is_last_document_desired = false;
-
- if (NeedSort()) {
- // Transition from shared_lock in NeedSort to unique_lock here is safe
- // because it doesn't hurt to sort again if sorting was done already by
- // another thread after NeedSort is evaluated. NeedSort is called before
- // sorting to improve concurrency as threads can avoid acquiring the unique
- // lock if no sorting is needed.
+ bool need_sort_at_querying = false;
+ {
+ absl_ports::shared_lock l(&mutex_);
+
+ // We sort here when:
+ // 1. We don't enable sorting at indexing time (i.e. we sort at querying
+ // time), and there is an unsorted tail portion. OR
+ // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold,
+ // regardless of whether or not hit_buffer_sort_at_indexing is enabled.
+ // This is more of a sanity check. We should not really be encountering
+ // this case.
+ need_sort_at_querying = NeedSortAtQuerying();
+ }
+ if (need_sort_at_querying) {
absl_ports::unique_lock l(&mutex_);
- SortHits();
+ IcingTimer timer;
+
+ // Transition from shared_lock to unique_lock is safe here because it
+ // doesn't hurt to sort again if sorting was done already by another thread
+ // after need_sort_at_querying is evaluated.
+ // We check need_sort_at_querying to improve query concurrency as threads
+ // can avoid acquiring the unique lock if no sorting is needed.
+ SortHitsImpl();
+
+ if (options_.hit_buffer_sort_at_indexing) {
+ // This is the second case for sort. Log as this should be a very rare
+ // occasion.
+ ICING_LOG(WARNING) << "Sorting HitBuffer at querying time when "
+ "hit_buffer_sort_at_indexing is enabled. Sort and "
+ "merge HitBuffer in "
+ << timer.Elapsed() * 1000 << " ms.";
+ }
}
// This downgrade from an unique_lock to a shared_lock is safe because we're
@@ -379,75 +470,72 @@ int LiteIndex::FetchHits(
// only in Seek().
// Any operations that might execute in between the transition of downgrading
// the lock here are guaranteed not to alter the searchable section (or the
- // LiteIndex due to a global lock in IcingSearchEngine).
+ // LiteIndex) due to a global lock in IcingSearchEngine.
absl_ports::shared_lock l(&mutex_);
- for (uint32_t idx = Seek(term_id); idx < header_->searchable_end(); idx++) {
- TermIdHitPair term_id_hit_pair =
- hit_buffer_.array_cast<TermIdHitPair>()[idx];
- if (term_id_hit_pair.term_id() != term_id) break;
-
- const Hit& hit = term_id_hit_pair.hit();
- // Check sections.
- if (((UINT64_C(1) << hit.section_id()) & section_id_mask) == 0) {
- continue;
- }
- // Check prefix section only.
- if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
- continue;
- }
- // TODO(b/230553264) Move common logic into helper function once we support
- // score term by prefix_hit in lite_index.
- // Check whether this Hit is desired.
- DocumentId document_id = hit.document_id();
- bool is_new_document = document_id != last_document_id;
- if (is_new_document) {
- last_document_id = document_id;
- is_last_document_desired =
- suggestion_result_checker == nullptr ||
- suggestion_result_checker->BelongsToTargetResults(document_id,
- hit.section_id());
- }
- if (!is_last_document_desired) {
- // The document is removed or expired or not desired.
- continue;
- }
- // Score the hit by the strategy
- switch (score_by) {
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE:
- score = 1;
- break;
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::
- DOCUMENT_COUNT:
- if (is_new_document) {
- ++score;
- }
- break;
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::
- TERM_FREQUENCY:
- if (hit.has_term_frequency()) {
- score += hit.term_frequency();
- } else {
- ++score;
- }
- break;
- }
+ // Search in the HitBuffer array for Hits with the corresponding term_id.
+ // Hits are added in increasing order of doc ids, so hits that get appended
+ // later have larger docIds. This means that:
+ // 1. Hits in the unsorted tail will have larger docIds than hits in the
+ // sorted portion.
+ // 2. Hits at the end of the unsorted tail will have larger docIds than hits
+ // in the front of the tail.
+ // We want to retrieve hits in descending order of docIds. Therefore we should
+ // search by doing:
+ // 1. Linear search first in reverse iteration order over the unsorted tail
+ // portion.
+ // 2. Followed by binary search on the sorted portion.
+ const TermIdHitPair* array = hit_buffer_.array_cast<TermIdHitPair>();
- // Append the Hit or update hit section to the output vector.
- if (is_new_document && hits_out != nullptr) {
- hits_out->push_back(DocHitInfo(document_id));
- if (term_frequency_out != nullptr) {
- term_frequency_out->push_back(Hit::TermFrequencyArray());
+ DocumentId last_document_id = kInvalidDocumentId;
+ // Record whether the last document belongs to the given namespaces.
+ bool is_last_document_desired = false;
+ int total_score = 0;
+
+ // Linear search over unsorted tail in reverse iteration order.
+ // This should only be performed when hit_buffer_sort_at_indexing is enabled.
+ // When disabled, the entire HitBuffer should be sorted already and only
+ // binary search is needed.
+ if (options_.hit_buffer_sort_at_indexing) {
+ uint32_t unsorted_length = header_->cur_size() - header_->searchable_end();
+ for (uint32_t i = 1; i <= unsorted_length; ++i) {
+ TermIdHitPair term_id_hit_pair = array[header_->cur_size() - i];
+ if (term_id_hit_pair.term_id() == term_id) {
+ // We've found a matched hit.
+ const Hit& matched_hit = term_id_hit_pair.hit();
+ // Score the hit and add to total_score. Also add the hits and its term
+ // frequency info to hits_out and term_frequency_out if the two vectors
+ // are non-null.
+ ScoreAndAppendFetchedHit(matched_hit, section_id_mask,
+ only_from_prefix_sections, score_by,
+ suggestion_result_checker, last_document_id,
+ is_last_document_desired, total_score,
+ hits_out, term_frequency_out);
}
}
- if (hits_out != nullptr) {
- hits_out->back().UpdateSection(hit.section_id());
- if (term_frequency_out != nullptr) {
- term_frequency_out->back()[hit.section_id()] = hit.term_frequency();
- }
+ }
+
+ // Do binary search over the sorted section and repeat the above steps.
+ TermIdHitPair target_term_id_hit_pair(
+ term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
+ for (const TermIdHitPair* ptr = std::lower_bound(
+ array, array + header_->searchable_end(), target_term_id_hit_pair);
+ ptr < array + header_->searchable_end(); ++ptr) {
+ if (ptr->term_id() != term_id) {
+ // We've processed all matches. Stop iterating further.
+ break;
}
+
+ const Hit& matched_hit = ptr->hit();
+ // Score the hit and add to total_score. Also add the hits and its term
+ // frequency info to hits_out and term_frequency_out if the two vectors are
+ // non-null.
+ ScoreAndAppendFetchedHit(
+ matched_hit, section_id_mask, only_from_prefix_sections, score_by,
+ suggestion_result_checker, last_document_id, is_last_document_desired,
+ total_score, hits_out, term_frequency_out);
}
- return score;
+ return total_score;
}
libtextclassifier3::StatusOr<int> LiteIndex::ScoreHits(
@@ -455,9 +543,9 @@ libtextclassifier3::StatusOr<int> LiteIndex::ScoreHits(
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
const SuggestionResultChecker* suggestion_result_checker) {
return FetchHits(term_id, kSectionIdMaskAll,
- /*only_from_prefix_sections=*/false, score_by,
- suggestion_result_checker,
- /*hits_out=*/nullptr);
+ /*only_from_prefix_sections=*/false, score_by,
+ suggestion_result_checker,
+ /*hits_out=*/nullptr);
}
bool LiteIndex::is_full() const {
@@ -515,7 +603,7 @@ IndexStorageInfoProto LiteIndex::GetStorageInfo(
return storage_info;
}
-void LiteIndex::SortHits() {
+void LiteIndex::SortHitsImpl() {
// Make searchable by sorting by hit buffer.
uint32_t sort_len = header_->cur_size() - header_->searchable_end();
if (sort_len <= 0) {
@@ -546,25 +634,6 @@ void LiteIndex::SortHits() {
UpdateChecksum();
}
-uint32_t LiteIndex::Seek(uint32_t term_id) const {
- // Binary search for our term_id. Make sure we get the first
- // element. Using kBeginSortValue ensures this for the hit value.
- TermIdHitPair term_id_hit_pair(
- term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
-
- const TermIdHitPair::Value* array =
- hit_buffer_.array_cast<TermIdHitPair::Value>();
- if (header_->searchable_end() != header_->cur_size()) {
- ICING_LOG(WARNING) << "Lite index: hit buffer searchable end != current "
- << "size during Seek(): "
- << header_->searchable_end() << " vs "
- << header_->cur_size();
- }
- const TermIdHitPair::Value* ptr = std::lower_bound(
- array, array + header_->searchable_end(), term_id_hit_pair.value());
- return ptr - array;
-}
-
libtextclassifier3::Status LiteIndex::Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id) {
@@ -575,7 +644,7 @@ libtextclassifier3::Status LiteIndex::Optimize(
}
// Sort the hits so that hits with the same term id will be grouped together,
// which helps later to determine which terms will be unused after compaction.
- SortHits();
+ SortHitsImpl();
uint32_t new_size = 0;
uint32_t curr_term_id = 0;
uint32_t curr_tvi = 0;
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 916a14b..288602a 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -20,6 +20,7 @@
#define ICING_INDEX_LITE_INDEX_H_
#include <cstdint>
+#include <iterator>
#include <limits>
#include <memory>
#include <string>
@@ -48,7 +49,6 @@
#include "icing/store/document-id.h"
#include "icing/store/namespace-id.h"
#include "icing/store/suggestion-result-checker.h"
-#include "icing/util/bit-util.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -63,6 +63,9 @@ class LiteIndex {
// An entry in the hit buffer.
using Options = LiteIndexOptions;
+ // Offset for the LiteIndex_Header in the hit buffer mmap.
+ static constexpr uint32_t kHeaderFileOffset = 0;
+
// Updates checksum of subcomponents.
~LiteIndex();
@@ -152,8 +155,8 @@ class LiteIndex {
// Add all hits with term_id from the sections specified in section_id_mask,
// skipping hits in non-prefix sections if only_from_prefix_sections is true,
// to hits_out. If hits_out is nullptr, no hits will be added. The
- // corresponding hit term frequencies will also be added if term_frequency_out
- // is nullptr.
+ // corresponding hit term frequencies will also not be added if
+ // term_frequency_out is nullptr.
//
// Only those hits which belongs to the given namespaces will be counted and
// fetched. A nullptr namespace checker will disable this check.
@@ -181,15 +184,29 @@ class LiteIndex {
uint32_t size() const ICING_LOCKS_EXCLUDED(mutex_) {
absl_ports::shared_lock l(&mutex_);
- return sizeLocked();
+ return size_impl();
}
bool WantsMerge() const ICING_LOCKS_EXCLUDED(mutex_) {
absl_ports::shared_lock l(&mutex_);
- return is_full() || sizeLocked() >= (options_.hit_buffer_want_merge_bytes /
- sizeof(TermIdHitPair::Value));
+ return is_full() || size_impl() >= (options_.hit_buffer_want_merge_bytes /
+ sizeof(TermIdHitPair::Value));
+ }
+
+ // Whether or not the HitBuffer's unsorted tail size exceeds the sort
+ // threshold.
+ bool HasUnsortedHitsExceedingSortThreshold() const
+ ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return HasUnsortedHitsExceedingSortThresholdImpl();
}
+ // Sort hits stored in the index.
+ void SortHits() ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::unique_lock l(&mutex_);
+ SortHitsImpl();
+ };
+
class const_iterator {
friend class LiteIndex;
@@ -326,17 +343,13 @@ class LiteIndex {
// Check if the hit buffer has reached its capacity.
bool is_full() const ICING_SHARED_LOCKS_REQUIRED(mutex_);
- uint32_t sizeLocked() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
- return header_->cur_size();
- }
-
// Non-locking implementation for empty().
bool empty_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
return size_impl() == 0;
}
// Non-locking implementation for size().
- bool size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ uint32_t size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
return header_->cur_size();
}
@@ -352,18 +365,48 @@ class LiteIndex {
NamespaceId namespace_id)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Whether or not the HitBuffer requires sorting.
- bool NeedSort() ICING_LOCKS_EXCLUDED(mutex_) {
- absl_ports::shared_lock l(&mutex_);
- return header_->cur_size() - header_->searchable_end() > 0;
+ // We need to sort during querying time when:
+ // 1. Sorting at indexing time is not enabled and there is an unsorted tail
+ // section in the HitBuffer.
+ // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold, regardless
+ // of whether or not hit_buffer_sort_at_indexing is enabled. This is to
+ // prevent performing sequential search on a large unsorted tail section,
+ // which would result in bad query performance.
+ // This is more of a sanity check. We should not really be encountering
+ // this case.
+ bool NeedSortAtQuerying() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return HasUnsortedHitsExceedingSortThresholdImpl() ||
+ (!options_.hit_buffer_sort_at_indexing &&
+ header_->cur_size() - header_->searchable_end() > 0);
}
- // Sort hits stored in the index.
- void SortHits() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ // Non-locking implementation for HasUnsortedHitsExceedingSortThresholdImpl().
+ bool HasUnsortedHitsExceedingSortThresholdImpl() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_->cur_size() - header_->searchable_end() >=
+ (options_.hit_buffer_sort_threshold_bytes /
+ sizeof(TermIdHitPair::Value));
+ }
- // Returns the position of the first element with term_id, or the searchable
- // end of the hit buffer if term_id is not present.
- uint32_t Seek(uint32_t term_id) const ICING_SHARED_LOCKS_REQUIRED(mutex_);
+ // Non-locking implementation for SortHits().
+ void SortHitsImpl() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Calculates and adds the score for a fetched hit to total_score_out, while
+ // updating last_document_id (which keeps track of the last added docId so
+ // far), and is_last_document_desired (which keeps track of whether that last
+ // added docId belongs to the query's desired namespace.)
+ //
+ // Also appends the hit to hits_out and term_frequency_out if the vectors are
+ // not null.
+ void ScoreAndAppendFetchedHit(
+ const Hit& hit, SectionIdMask section_id_mask,
+ bool only_from_prefix_sections,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+ const SuggestionResultChecker* suggestion_result_checker,
+ DocumentId& last_document_id, bool& is_last_document_desired,
+ int& total_score_out, std::vector<DocHitInfo>* hits_out,
+ std::vector<Hit::TermFrequencyArray>* term_frequency_out) const
+ ICING_SHARED_LOCKS_REQUIRED(mutex_);
// File descriptor that points to where the header and hit buffer are written
// to.
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
index 5f141ed..9811fa2 100644
--- a/icing/index/lite/lite-index_test.cc
+++ b/icing/index/lite/lite-index_test.cc
@@ -14,14 +14,27 @@
#include "icing/index/lite/lite-index.h"
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
+#include "icing/index/lite/lite-index-header.h"
#include "icing/index/term-id-codec.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
-#include "icing/store/suggestion-result-checker.h"
+#include "icing/store/namespace-id.h"
#include "icing/testing/always-false-suggestion-result-checker-impl.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -34,6 +47,8 @@ namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
using ::testing::SizeIs;
class LiteIndexTest : public testing::Test {
@@ -41,62 +56,518 @@ class LiteIndexTest : public testing::Test {
void SetUp() override {
index_dir_ = GetTestTempDir() + "/test_dir";
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
-
- std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
- LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
- ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
- LiteIndex::Create(options, &icing_filesystem_));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- term_id_codec_,
- TermIdCodec::Create(
- IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
- IcingDynamicTrie::max_value_index(options.lexicon_options)));
}
void TearDown() override {
term_id_codec_.reset();
- lite_index_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
}
std::string index_dir_;
Filesystem filesystem_;
IcingFilesystem icing_filesystem_;
- std::unique_ptr<LiteIndex> lite_index_;
std::unique_ptr<TermIdCodec> term_id_codec_;
};
constexpr NamespaceId kNamespace0 = 0;
-TEST_F(LiteIndexTest, LiteIndexAppendHits) {
+TEST_F(LiteIndexTest,
+ LiteIndexFetchHits_sortAtQuerying_unsortedHitsBelowSortThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
- uint32_t tvi,
- lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
- term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
- Hit doc_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+
+ // Check that unsorted hits does not exceed the sort threshold.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+
+ // Check that hits are unsorted. Persist the data and pread from
+ // LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+
+ // Query the LiteIndex
std::vector<DocHitInfo> hits1;
- lite_index_->FetchHits(
+ lite_index->FetchHits(
foo_term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false,
SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
/*namespace_checker=*/nullptr, &hits1);
EXPECT_THAT(hits1, SizeIs(1));
- EXPECT_THAT(hits1.back().document_id(), Eq(0));
+ EXPECT_THAT(hits1.back().document_id(), Eq(1));
// Check that the hits are coming from section 0 and section 1.
EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
std::vector<DocHitInfo> hits2;
AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
- lite_index_->FetchHits(
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ // Check that hits are sorted after querying LiteIndex. Persist the data and
+ // pread from LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+}
+
+TEST_F(LiteIndexTest,
+ LiteIndexFetchHits_sortAtIndexing_unsortedHitsBelowSortThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ // However note that in these tests we're unable to sort hits after
+ // indexing, as sorting performed by the string-section-indexing-handler
+ // after indexing all hits in an entire document, rather than after each
+ // AddHits() operation.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+
+ // Check that unsorted hits does not exceed the sort threshold.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+
+ // Check that hits are unsorted. Persist the data and pread from
+ // LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+
+ // Query the LiteIndex
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(1));
+ EXPECT_THAT(hits1.back().document_id(), Eq(1));
+ // Check that the hits are coming from section 0 and section 1.
+ EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ // Check that hits are still unsorted after querying LiteIndex because the
+ // HitBuffer unsorted size is still below the sort threshold, and we've
+ // enabled sort_at_indexing.
+ // Persist the data and performing a pread on LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+}
+
+TEST_F(
+ LiteIndexTest,
+ LiteIndexFetchHits_sortAtQuerying_unsortedHitsExceedingSortAtIndexThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ // However note that in these tests we're unable to sort hits after
+ // indexing, as sorting performed by the string-section-indexing-handler
+ // after indexing all hits in an entire document, rather than after each
+ // AddHits() operation.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
+ // Doc 0
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 1
+ Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 2
+ Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 3
+ Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+
+ // Create terms
+ // Foo
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ // Bar
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ // Baz
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t baz_tvi,
+ lite_index->InsertTerm("baz", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t baz_term_id,
+ term_id_codec_->EncodeTvi(baz_tvi, TviType::LITE));
+ // Qux
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t qux_tvi,
+ lite_index->InsertTerm("qux", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t qux_term_id,
+ term_id_codec_->EncodeTvi(qux_tvi, TviType::LITE));
+
+ // Add 14 hits and make sure that termIds are added in unsorted order.
+ // Documents should be inserted in order as new incoming hits should have
+ // larger document ids.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc0_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc0_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc2_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc2_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
+ // Verify that the HitBuffer has not been sorted.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+
+ // We now have the following in the hit buffer:
+ // <term>: {(docId, sectionId)...}
+ // foo: {(0, 0); (1, 0); (1, 1); (2, 0); (2, 2); (3, 0)}
+ // bar: {(0, 0); (1, 0); (1, 2)}
+ // baz: {(0, 1); (2, 0); (3, 0)}
+ // quz: {(0, 2); (2, 1)}
+
+ // Search over the HitBuffer.
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(4));
+ // Check that hits are retrieved in descending order of docIds.
+ EXPECT_THAT(hits1[0].document_id(), Eq(3));
+ EXPECT_THAT(hits1[0].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits1[1].document_id(), Eq(2));
+ EXPECT_THAT(hits1[1].hit_section_ids_mask(), Eq(0b101));
+ EXPECT_THAT(hits1[2].document_id(), Eq(1));
+ EXPECT_THAT(hits1[2].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[3].document_id(), Eq(0));
+ EXPECT_THAT(hits1[3].hit_section_ids_mask(), Eq(0b1));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ std::vector<DocHitInfo> hits3;
+ lite_index->FetchHits(
+ bar_term_id, 0b1,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits3);
+ EXPECT_THAT(hits3, SizeIs(2));
+ // Check fetching hits with SectionIdMask.
+ EXPECT_THAT(hits3[0].document_id(), Eq(1));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits3[1].document_id(), Eq(0));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+
+ // Check that the HitBuffer is sorted after the query call.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+}
+
+TEST_F(
+ LiteIndexTest,
+ LiteIndexFetchHits_sortAtIndexing_unsortedHitsExceedingSortAtIndexThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
+ // Doc 0
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 1
+ Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 2
+ Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 3
+ Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit2(/*section_id=*/1, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit3(/*section_id=*/2, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 4
+ Hit doc4_hit0(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit1(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit2(/*section_id=*/1, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit3(/*section_id=*/2, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+
+ // Create terms
+ // Foo
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ // Bar
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ // Baz
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t baz_tvi,
+ lite_index->InsertTerm("baz", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t baz_term_id,
+ term_id_codec_->EncodeTvi(baz_tvi, TviType::LITE));
+ // Qux
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t qux_tvi,
+ lite_index->InsertTerm("qux", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t qux_term_id,
+ term_id_codec_->EncodeTvi(qux_tvi, TviType::LITE));
+
+ // Add hits and make sure that termIds are added in unsorted order.
+ // Documents should be inserted in order as new incoming hits should have
+ // larger document ids.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc0_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc0_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit3));
+ // Adding 8 hits exceeds the sort threshold. However when sort_at_indexing is
+ // enabled, sorting is done in the string-section-indexing-handler rather than
+ // AddHit() itself, we need to invoke SortHits() manually.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+ lite_index->SortHits();
+ // Check that the HitBuffer is sorted.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+
+ // Add 12 more hits so that sort threshold is exceeded again.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc2_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc2_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc3_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc4_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc4_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc4_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc4_hit3));
+
+ // Adding these hits exceeds the sort threshold. However when sort_at_indexing
+ // is enabled, sorting is done in the string-section-indexing-handler rather
+ // than AddHit() itself.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+
+ // We now have the following in the hit buffer:
+ // <term>: {(docId, sectionId)...}
+ // foo: {(0, 0); (1, 0); (1, 1); (2, 0); (2, 2); (3, 0); (3, 1); (4, 1)}
+ // bar: {(0, 0); (1, 0); (1, 2); (3, 2); (4, 2)}
+ // baz: {(0, 1); (2, 0); (3, 0); (4, 0)}
+ // quz: {(0, 2); (2, 1); (4, 0)}
+
+ // Search over the HitBuffer.
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(5));
+ // Check that hits are retrieved in descending order of docIds.
+ EXPECT_THAT(hits1[0].document_id(), Eq(4));
+ EXPECT_THAT(hits1[0].hit_section_ids_mask(), Eq(0b10));
+ EXPECT_THAT(hits1[1].document_id(), Eq(3));
+ EXPECT_THAT(hits1[1].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[2].document_id(), Eq(2));
+ EXPECT_THAT(hits1[2].hit_section_ids_mask(), Eq(0b101));
+ EXPECT_THAT(hits1[3].document_id(), Eq(1));
+ EXPECT_THAT(hits1[3].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[4].document_id(), Eq(0));
+ EXPECT_THAT(hits1[4].hit_section_ids_mask(), Eq(0b1));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
foo_term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false,
SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
@@ -104,13 +575,119 @@ TEST_F(LiteIndexTest, LiteIndexAppendHits) {
// Check that no hits are returned because they get skipped by the namespace
// checker.
EXPECT_THAT(hits2, IsEmpty());
+
+ std::vector<DocHitInfo> hits3;
+ lite_index->FetchHits(
+ bar_term_id, 0b1,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits3);
+ EXPECT_THAT(hits3, SizeIs(2));
+ // Check fetching hits with SectionIdMask.
+ EXPECT_THAT(hits3[0].document_id(), Eq(1));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits3[1].document_id(), Eq(0));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+
+ // Check that the HitBuffer is sorted after the query call. FetchHits should
+ // sort before performing binary search if the HitBuffer unsorted size exceeds
+ // the sort threshold. Regardless of the sort_at_indexing config.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
}
TEST_F(LiteIndexTest, LiteIndexIterator) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ const std::string term = "foo";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
+ /*is_in_prefix_section=*/false);
+ SectionIdMask doc0_section_id_mask = 0b11;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
+
+ Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/2, /*document_id=*/1, /*term_frequency=*/11,
+ /*is_in_prefix_section=*/false);
+ SectionIdMask doc1_section_id_mask = 0b110;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+
+ std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
+ std::make_unique<DocHitInfoIteratorTermLiteExact>(
+ term_id_codec_.get(), lite_index.get(), term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ /*need_hit_term_frequency=*/true);
+
+ ASSERT_THAT(iter->Advance(), IsOk());
+ EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(1));
+ EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+ Eq(doc1_section_id_mask));
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ iter->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ term, expected_section_ids_tf_map1)));
+
+ ASSERT_THAT(iter->Advance(), IsOk());
+ EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(0));
+ EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+ Eq(doc0_section_id_mask));
+ matched_terms_stats.clear();
+ iter->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ term, expected_section_ids_tf_map0)));
+}
+
+TEST_F(LiteIndexTest, LiteIndexIterator_sortAtIndexingDisabled) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
const std::string term = "foo";
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t tvi,
- lite_index_->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+ lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
@@ -120,8 +697,8 @@ TEST_F(LiteIndexTest, LiteIndexIterator) {
SectionIdMask doc0_section_id_mask = 0b11;
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc0_hit0));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
/*is_in_prefix_section=*/false);
@@ -130,12 +707,12 @@ TEST_F(LiteIndexTest, LiteIndexIterator) {
SectionIdMask doc1_section_id_mask = 0b110;
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit1));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
std::make_unique<DocHitInfoIteratorTermLiteExact>(
- term_id_codec_.get(), lite_index_.get(), term, /*term_start_index=*/0,
+ term_id_codec_.get(), lite_index.get(), term, /*term_start_index=*/0,
/*unnormalized_term_length=*/0, kSectionIdMaskAll,
/*need_hit_term_frequency=*/true);
diff --git a/icing/index/lite/lite-index_thread-safety_test.cc b/icing/index/lite/lite-index_thread-safety_test.cc
index 7711f92..53aa6cd 100644
--- a/icing/index/lite/lite-index_thread-safety_test.cc
+++ b/icing/index/lite/lite-index_thread-safety_test.cc
@@ -19,12 +19,9 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
#include "icing/index/lite/lite-index.h"
#include "icing/index/term-id-codec.h"
#include "icing/schema/section.h"
-#include "icing/store/suggestion-result-checker.h"
-#include "icing/testing/always-false-suggestion-result-checker-impl.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -52,7 +49,9 @@ class LiteIndexThreadSafetyTest : public testing::Test {
std::string lite_index_file_name =
index_dir_ + "/test_file.lite-idx-thread-safety.index";
LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
diff --git a/icing/index/lite/term-id-hit-pair.h b/icing/index/lite/term-id-hit-pair.h
index 61ec502..82bd010 100644
--- a/icing/index/lite/term-id-hit-pair.h
+++ b/icing/index/lite/term-id-hit-pair.h
@@ -73,6 +73,8 @@ class TermIdHitPair {
return value_ == rhs.value_;
}
+ bool operator<(const TermIdHitPair& rhs) const { return value_ < rhs.value_; }
+
private:
Value value_;
};
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index 8f0d3f5..5cf6a4c 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -14,16 +14,20 @@
#include "icing/index/main/doc-hit-info-iterator-term-main.h"
-#include <cstdint>
#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
-#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/main/main-index.h"
#include "icing/index/main/posting-list-hit-accessor.h"
-#include "icing/legacy/core/icing-string-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/util/logging.h"
@@ -44,6 +48,30 @@ std::string SectionIdMaskToString(SectionIdMask section_id_mask) {
return mask;
}
+void MergeNewHitIntoCachedDocHitInfos(
+ const Hit& hit, bool need_hit_term_frequency,
+ std::vector<DocHitInfoIteratorTermMain::DocHitInfoAndTermFrequencyArray>&
+ cached_doc_hit_infos_out) {
+ if (cached_doc_hit_infos_out.empty() ||
+ hit.document_id() !=
+ cached_doc_hit_infos_out.back().doc_hit_info.document_id()) {
+ std::optional<Hit::TermFrequencyArray> tf_arr;
+ if (need_hit_term_frequency) {
+ tf_arr = std::make_optional<Hit::TermFrequencyArray>();
+ }
+
+ cached_doc_hit_infos_out.push_back(
+ DocHitInfoIteratorTermMain::DocHitInfoAndTermFrequencyArray(
+ DocHitInfo(hit.document_id()), std::move(tf_arr)));
+ }
+
+ cached_doc_hit_infos_out.back().doc_hit_info.UpdateSection(hit.section_id());
+ if (need_hit_term_frequency) {
+ (*cached_doc_hit_infos_out.back().term_frequency_array)[hit.section_id()] =
+ hit.term_frequency();
+ }
+}
+
} // namespace
libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
@@ -76,7 +104,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
- doc_hit_info_ = cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_);
+ doc_hit_info_ =
+ cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_).doc_hit_info;
hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask();
return libtextclassifier3::Status::OK;
}
@@ -90,16 +119,16 @@ DocHitInfoIteratorTermMain::TrimRightMostNode() && {
}
libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
- DocHitInfo last_doc_hit_info;
+ DocHitInfoAndTermFrequencyArray last_doc_hit_info;
if (!cached_doc_hit_infos_.empty()) {
- last_doc_hit_info = cached_doc_hit_infos_.back();
+ last_doc_hit_info = std::move(cached_doc_hit_infos_.back());
}
cached_doc_hit_infos_idx_ = 0;
cached_doc_hit_infos_.clear();
- if (last_doc_hit_info.document_id() != kInvalidDocumentId) {
+ if (last_doc_hit_info.doc_hit_info.document_id() != kInvalidDocumentId) {
// Carry over the last hit. It might need to be merged with the first hit of
// of the next posting list in the chain.
- cached_doc_hit_infos_.push_back(last_doc_hit_info);
+ cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
}
if (posting_list_accessor_ == nullptr) {
ICING_ASSIGN_OR_RETURN(posting_list_accessor_,
@@ -112,8 +141,7 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
all_pages_consumed_ = true;
}
++num_blocks_inspected_;
- cached_doc_hit_infos_.reserve(hits.size() + 1);
- cached_hit_term_frequency_.reserve(hits.size() + 1);
+ cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
for (const Hit& hit : hits) {
// Check sections.
if (((UINT64_C(1) << hit.section_id()) & section_restrict_mask_) == 0) {
@@ -123,13 +151,9 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
if (hit.is_prefix_hit()) {
continue;
}
- if (cached_doc_hit_infos_.empty() ||
- hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
- cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
- cached_hit_term_frequency_.push_back(Hit::TermFrequencyArray());
- }
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id());
- cached_hit_term_frequency_.back()[hit.section_id()] = hit.term_frequency();
+
+ MergeNewHitIntoCachedDocHitInfos(hit, need_hit_term_frequency_,
+ cached_doc_hit_infos_);
}
return libtextclassifier3::Status::OK;
}
@@ -141,16 +165,16 @@ std::string DocHitInfoIteratorTermMainExact::ToString() const {
libtextclassifier3::Status
DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
- DocHitInfo last_doc_hit_info;
+ DocHitInfoAndTermFrequencyArray last_doc_hit_info;
if (!cached_doc_hit_infos_.empty()) {
- last_doc_hit_info = cached_doc_hit_infos_.back();
+ last_doc_hit_info = std::move(cached_doc_hit_infos_.back());
}
cached_doc_hit_infos_idx_ = 0;
cached_doc_hit_infos_.clear();
- if (last_doc_hit_info.document_id() != kInvalidDocumentId) {
+ if (last_doc_hit_info.doc_hit_info.document_id() != kInvalidDocumentId) {
// Carry over the last hit. It might need to be merged with the first hit of
// of the next posting list in the chain.
- cached_doc_hit_infos_.push_back(last_doc_hit_info);
+ cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
}
++num_blocks_inspected_;
@@ -165,10 +189,7 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
if (hits.empty()) {
all_pages_consumed_ = true;
}
- cached_doc_hit_infos_.reserve(hits.size());
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.reserve(hits.size());
- }
+ cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
for (const Hit& hit : hits) {
// Check sections.
if (((UINT64_C(1) << hit.section_id()) & section_restrict_mask_) == 0) {
@@ -178,18 +199,9 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
if (!exact_ && !hit.is_in_prefix_section()) {
continue;
}
- if (cached_doc_hit_infos_.empty() ||
- hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
- cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.push_back(Hit::TermFrequencyArray());
- }
- }
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id());
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.back()[hit.section_id()] =
- hit.term_frequency();
- }
+
+ MergeNewHitIntoCachedDocHitInfos(hit, need_hit_term_frequency_,
+ cached_doc_hit_infos_);
}
return libtextclassifier3::Status::OK;
}
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index 08a385c..1987e12 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -17,10 +17,14 @@
#include <cstdint>
#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/main/main-index.h"
#include "icing/index/main/posting-list-hit-accessor.h"
@@ -31,6 +35,19 @@ namespace lib {
class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
public:
+ struct DocHitInfoAndTermFrequencyArray {
+ DocHitInfo doc_hit_info;
+ std::optional<Hit::TermFrequencyArray> term_frequency_array;
+
+ explicit DocHitInfoAndTermFrequencyArray() = default;
+
+ explicit DocHitInfoAndTermFrequencyArray(
+ DocHitInfo doc_hit_info_in,
+ std::optional<Hit::TermFrequencyArray> term_frequency_array_in)
+ : doc_hit_info(std::move(doc_hit_info_in)),
+ term_frequency_array(std::move(term_frequency_array_in)) {}
+ };
+
explicit DocHitInfoIteratorTermMain(MainIndex* main_index,
const std::string& term,
int term_start_index,
@@ -74,8 +91,9 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
while (section_mask_copy) {
SectionId section_id = __builtin_ctzll(section_mask_copy);
if (need_hit_term_frequency_) {
- section_term_frequencies.at(section_id) = cached_hit_term_frequency_.at(
- cached_doc_hit_infos_idx_)[section_id];
+ section_term_frequencies.at(section_id) =
+ (*cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_)
+ .term_frequency_array)[section_id];
}
section_mask_copy &= ~(UINT64_C(1) << section_id);
}
@@ -106,12 +124,13 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
std::unique_ptr<PostingListHitAccessor> posting_list_accessor_;
MainIndex* main_index_;
- // Stores hits retrieved from the index. This may only be a subset of the hits
- // that are present in the index. Current value pointed to by the Iterator is
- // tracked by cached_doc_hit_infos_idx_.
- std::vector<DocHitInfo> cached_doc_hit_infos_;
- std::vector<Hit::TermFrequencyArray> cached_hit_term_frequency_;
+ // Stores hits and optional term frequency arrays retrieved from the index.
+ // This may only be a subset of the hits that are present in the index.
+ // Current value pointed to by the Iterator is tracked by
+ // cached_doc_hit_infos_idx_.
+ std::vector<DocHitInfoAndTermFrequencyArray> cached_doc_hit_infos_;
int cached_doc_hit_infos_idx_;
+
int num_advance_calls_;
int num_blocks_inspected_;
bool all_pages_consumed_;
@@ -168,10 +187,6 @@ class DocHitInfoIteratorTermMainPrefix : public DocHitInfoIteratorTermMain {
libtextclassifier3::Status RetrieveMoreHits() override;
private:
- // After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and
- // "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be
- // merged.
- void SortAndDedupeDocumentIds();
// Whether or not posting_list_accessor_ holds a posting list chain for
// 'term' or for a term for which 'term' is a prefix. This is necessary to
// determine whether to return hits that are not from a prefix section (hits
diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc
index 8a2f691..37e14fc 100644
--- a/icing/index/main/main-index-merger_test.cc
+++ b/icing/index/main/main-index-merger_test.cc
@@ -45,7 +45,9 @@ class MainIndexMergerTest : public testing::Test {
std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index ac724b0..fa96e6c 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -38,6 +38,7 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
+using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::NiceMock;
using ::testing::Return;
@@ -90,7 +91,9 @@ class MainIndexTest : public testing::Test {
std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
@@ -361,7 +364,9 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
// - Doc4 {"four", "foul" is_in_prefix_section=true}
std::string lite_index_file_name2 = index_dir_ + "/test_file.lite-idx.index2";
LiteIndex::Options options(lite_index_file_name2,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -531,30 +536,35 @@ TEST_F(MainIndexTest, PrefixNotRetrievedInExactSearch) {
std::vector<SectionId>{doc1_hit.section_id()})));
}
-TEST_F(MainIndexTest, SearchChainedPostingLists) {
+TEST_F(MainIndexTest,
+ SearchChainedPostingListsShouldMergeSectionsAndTermFrequency) {
// Index 2048 document with 3 hits in each document. When merged into the main
// index, this will 1) lead to a chained posting list and 2) split at least
// one document's hits across multiple posting lists.
+ const std::string term = "foot";
+
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t tvi,
- lite_index_->InsertTerm("foot", TermMatchType::EXACT_ONLY, kNamespace0));
+ lite_index_->InsertTerm(term, TermMatchType::EXACT_ONLY, kNamespace0));
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
for (DocumentId document_id = 0; document_id < 2048; ++document_id) {
- Hit doc_hit0(/*section_id=*/0, /*document_id=*/document_id,
- Hit::kDefaultTermFrequency,
- /*is_in_prefix_section=*/false);
+ Hit::TermFrequency term_frequency = static_cast<Hit::TermFrequency>(
+ document_id % Hit::kMaxTermFrequency + 1);
+ Hit doc_hit0(
+ /*section_id=*/0, /*document_id=*/document_id, term_frequency,
+ /*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit0));
- Hit doc_hit1(/*section_id=*/1, /*document_id=*/document_id,
- Hit::kDefaultTermFrequency,
- /*is_in_prefix_section=*/false);
+ Hit doc_hit1(
+ /*section_id=*/1, /*document_id=*/document_id, term_frequency,
+ /*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit1));
- Hit doc_hit2(/*section_id=*/2, /*document_id=*/document_id,
- Hit::kDefaultTermFrequency,
- /*is_in_prefix_section=*/false);
+ Hit doc_hit2(
+ /*section_id=*/2, /*document_id=*/document_id, term_frequency,
+ /*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit2));
}
@@ -568,15 +578,35 @@ TEST_F(MainIndexTest, SearchChainedPostingLists) {
// 3. Merge the lite index.
ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits for all documents containing "foot" - which should be all of them.
- std::vector<DocHitInfo> hits =
- GetExactHits(main_index.get(), /*term_start_index=*/0,
- /*unnormalized_term_length=*/0, "foot");
- EXPECT_THAT(hits, SizeIs(2048));
- EXPECT_THAT(hits.front(),
- EqualsDocHitInfo(2047, std::vector<SectionId>{0, 1, 2}));
- EXPECT_THAT(hits.back(),
- EqualsDocHitInfo(0, std::vector<SectionId>{0, 1, 2}));
+ auto iterator = std::make_unique<DocHitInfoIteratorTermMainExact>(
+ main_index.get(), term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ /*need_hit_term_frequency=*/true);
+
+ DocumentId expected_document_id = 2047;
+ while (iterator->Advance().ok()) {
+ EXPECT_THAT(iterator->doc_hit_info(),
+ EqualsDocHitInfo(expected_document_id,
+ std::vector<SectionId>{0, 1, 2}));
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+
+ Hit::TermFrequency expected_term_frequency =
+ static_cast<Hit::TermFrequency>(
+ expected_document_id % Hit::kMaxTermFrequency + 1);
+ ASSERT_THAT(matched_terms_stats, SizeIs(1));
+ EXPECT_THAT(matched_terms_stats[0].term, Eq(term));
+ EXPECT_THAT(matched_terms_stats[0].term_frequencies[0],
+ Eq(expected_term_frequency));
+ EXPECT_THAT(matched_terms_stats[0].term_frequencies[1],
+ Eq(expected_term_frequency));
+ EXPECT_THAT(matched_terms_stats[0].term_frequencies[2],
+ Eq(expected_term_frequency));
+ --expected_document_id;
+ }
+ EXPECT_THAT(expected_document_id, Eq(-1));
}
TEST_F(MainIndexTest, MergeIndexBackfilling) {
@@ -606,7 +636,9 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) {
// - Doc1 {"foot" is_in_prefix_section=false}
std::string lite_index_file_name2 = index_dir_ + "/test_file.lite-idx.index2";
LiteIndex::Options options(lite_index_file_name2,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h
index 34b1acb..ce5fa45 100644
--- a/icing/index/numeric/dummy-numeric-index.h
+++ b/icing/index/numeric/dummy-numeric-index.h
@@ -183,7 +183,11 @@ class DummyNumericIndex : public NumericIndex<T> {
std::string&& working_path)
: NumericIndex<T>(filesystem, std::move(working_path),
PersistentStorage::WorkingPathType::kDummy),
- last_added_document_id_(kInvalidDocumentId) {}
+ dummy_crcs_buffer_(
+ std::make_unique<uint8_t[]>(sizeof(PersistentStorage::Crcs))),
+ last_added_document_id_(kInvalidDocumentId) {
+ memset(dummy_crcs_buffer_.get(), 0, sizeof(PersistentStorage::Crcs));
+ }
libtextclassifier3::Status PersistStoragesToDisk(bool force) override {
return libtextclassifier3::Status::OK;
@@ -202,11 +206,17 @@ class DummyNumericIndex : public NumericIndex<T> {
return Crc32(0);
}
- PersistentStorage::Crcs& crcs() override { return dummy_crcs_; }
- const PersistentStorage::Crcs& crcs() const override { return dummy_crcs_; }
+ PersistentStorage::Crcs& crcs() override {
+ return *reinterpret_cast<PersistentStorage::Crcs*>(
+ dummy_crcs_buffer_.get());
+ }
+ const PersistentStorage::Crcs& crcs() const override {
+ return *reinterpret_cast<const PersistentStorage::Crcs*>(
+ dummy_crcs_buffer_.get());
+ }
std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>> storage_;
- PersistentStorage::Crcs dummy_crcs_;
+ std::unique_ptr<uint8_t[]> dummy_crcs_buffer_;
DocumentId last_added_document_id_;
};
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
index 69b8889..f5e06ad 100644
--- a/icing/index/string-section-indexing-handler.cc
+++ b/icing/index/string-section-indexing-handler.cc
@@ -122,6 +122,17 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle(
}
}
+ // Check and sort the LiteIndex HitBuffer if we're successful.
+ if (status.ok() && index_.LiteIndexNeedSort()) {
+ std::unique_ptr<Timer> sort_timer = clock_.GetNewTimer();
+ index_.SortLiteIndex();
+
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_lite_index_sort_latency_ms(
+ sort_timer->GetElapsedMilliseconds());
+ }
+ }
+
if (put_document_stats != nullptr) {
put_document_stats->set_term_index_latency_ms(
index_timer->GetElapsedMilliseconds());
diff --git a/icing/index/string-section-indexing-handler_test.cc b/icing/index/string-section-indexing-handler_test.cc
new file mode 100644
index 0000000..2c7f5e3
--- /dev/null
+++ b/icing/index/string-section-indexing-handler_test.cc
@@ -0,0 +1,587 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/string-section-indexing-handler.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Test;
+
+// Schema type with indexable properties and section Id.
+// Section Id is determined by the lexicographical order of indexable property
+// path.
+// Section id = 0: body
+// Section id = 1: title
+constexpr std::string_view kFakeType = "FakeType";
+constexpr std::string_view kPropertyBody = "body";
+constexpr std::string_view kPropertyTitle = "title";
+
+constexpr SectionId kSectionIdBody = 0;
+constexpr SectionId kSectionIdTitle = 1;
+
+// Schema type with nested indexable properties and section Id.
+// Section id = 0: "name"
+// Section id = 1: "nested.body"
+// Section id = 3: "nested.title"
+// Section id = 4: "subject"
+constexpr std::string_view kNestedType = "NestedType";
+constexpr std::string_view kPropertyName = "name";
+constexpr std::string_view kPropertyNestedDoc = "nested";
+constexpr std::string_view kPropertySubject = "subject";
+
+constexpr SectionId kSectionIdNestedBody = 1;
+
+class StringSectionIndexingHandlerTest : public Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ base_dir_ = GetTestTempDir() + "/icing_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ index_dir_ = base_dir_ + "/index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+ document_store_dir_ = base_dir_ + "/document_store";
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ normalizer_,
+ normalizer_factory::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
+
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+ IsTrue());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kFakeType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyTitle)
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyBody)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kNestedType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyNestedDoc)
+ .SetDataTypeDocument(
+ kFakeType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ASSERT_TRUE(
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult doc_store_create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ document_store_ = std::move(doc_store_create_result.document_store);
+ }
+
+ void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
+ normalizer_.reset();
+ lang_segmenter_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ FakeClock fake_clock_;
+ std::string base_dir_;
+ std::string index_dir_;
+ std::string schema_store_dir_;
+ std::string document_store_dir_;
+
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
+};
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+ std::vector<DocHitInfo> infos;
+ while (iterator->Advance().ok()) {
+ infos.push_back(iterator->doc_hit_info());
+ }
+ return infos;
+}
+
+std::vector<DocHitInfoTermFrequencyPair> GetHitsWithTermFrequency(
+ std::unique_ptr<DocHitInfoIterator> iterator) {
+ std::vector<DocHitInfoTermFrequencyPair> infos;
+ while (iterator->Advance().ok()) {
+ std::vector<TermMatchInfo> matched_terms_stats;
+ iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+ for (const TermMatchInfo& term_match_info : matched_terms_stats) {
+ infos.push_back(DocHitInfoTermFrequencyPair(
+ iterator->doc_hit_info(), term_match_info.term_frequencies));
+ }
+ }
+ return infos;
+}
+
+TEST_F(StringSectionIndexingHandlerTest,
+ HandleIntoLiteIndex_sortInIndexingNotTriggered) {
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "foo")
+ .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(tokenized_document.document()));
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler> handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index.get()));
+ EXPECT_THAT(
+ handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id));
+
+ // Query 'foo'
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ std::vector<DocHitInfoTermFrequencyPair> hits =
+ GetHitsWithTermFrequency(std::move(itr));
+ std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+ {kSectionIdTitle, 1}, {kSectionIdBody, 1}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ document_id, expected_map)));
+
+ // Query 'foo' with sectionId mask that masks all results
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, 1U << 2,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(StringSectionIndexingHandlerTest,
+ HandleIntoLiteIndex_sortInIndexingTriggered) {
+ // Create the LiteIndex with a smaller sort threshold. At 64 bytes we sort the
+ // HitBuffer after inserting 8 hits
+ Index::Options options(index_dir_,
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ DocumentProto document0 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "foo foo foo")
+ .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+ .Build();
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "bar baz baz")
+ .AddStringProperty(std::string(kPropertyBody), "foo foo baz")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("icing", "nested_type/0")
+ .SetSchema(std::string(kNestedType))
+ .AddDocumentProperty(std::string(kPropertyNestedDoc), document1)
+ .AddStringProperty(std::string(kPropertyName), "qux")
+ .AddStringProperty(std::string(kPropertySubject), "bar bar")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document0,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document0)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id0,
+ document_store_->Put(tokenized_document0.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document1,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(tokenized_document1.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document2,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(tokenized_document2.document()));
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler> handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index.get()));
+
+ // Handle doc0 and doc1. The LiteIndex should sort and merge after adding
+ // these
+ EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id1));
+ EXPECT_THAT(index->LiteIndexNeedSort(), IsFalse());
+
+ // Handle doc2. The LiteIndex should have an unsorted portion after adding
+ EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id2));
+
+ // Hits in the hit buffer:
+ // <term>: {(docId, sectionId, term_freq)...}
+ // foo: {(0, kSectionIdTitle, 3); (0, kSectionIdBody, 1);
+ // (1, kSectionIdBody, 2);
+ // (2, kSectionIdNestedBody, 2)}
+ // bar: {(0, kSectionIdBody, 1);
+ // (1, kSectionIdTitle, 1);
+ // (2, kSectionIdNestedTitle, 1); (2, kSectionIdSubject, 2)}
+ // baz: {(0, kSectionIdBody, 1);
+ // (1, kSectionIdTitle, 2); (1, kSectionIdBody, 1),
+ // (2, kSectionIdNestedTitle, 2); (2, kSectionIdNestedBody, 1)}
+ // qux: {(2, kSectionIdName, 1)}
+
+ // Query 'foo'
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+
+ // Advance the iterator and verify that we're returning hits in the correct
+ // order (i.e. in descending order of DocId)
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(2));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdNestedBody));
+ std::vector<TermMatchInfo> matched_terms_stats;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map2 = {{kSectionIdNestedBody, 2}};
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map2)));
+
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(1));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdBody));
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map1 = {{kSectionIdBody, 2}};
+ matched_terms_stats.clear();
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map1)));
+
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(0));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdTitle | 1U << kSectionIdBody));
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map0 = {{kSectionIdTitle, 3},
+ {kSectionIdBody, 1}};
+ matched_terms_stats.clear();
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map0)));
+}
+
+TEST_F(StringSectionIndexingHandlerTest,
+ HandleIntoLiteIndex_enableSortInIndexing) {
+ // Create the LiteIndex with a smaller sort threshold. At 64 bytes we sort the
+ // HitBuffer after inserting 8 hits
+ Index::Options options(index_dir_,
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/false,
+ /*lite_index_sort_size=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ DocumentProto document0 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "foo foo foo")
+ .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+ .Build();
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPropertyTitle), "bar baz baz")
+ .AddStringProperty(std::string(kPropertyBody), "foo foo baz")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("icing", "nested_type/0")
+ .SetSchema(std::string(kNestedType))
+ .AddDocumentProperty(std::string(kPropertyNestedDoc), document1)
+ .AddStringProperty(std::string(kPropertyName), "qux")
+ .AddStringProperty(std::string(kPropertySubject), "bar bar")
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document0,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document0)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id0,
+ document_store_->Put(tokenized_document0.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document1,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store_->Put(tokenized_document1.document()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document2,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ std::move(document2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store_->Put(tokenized_document2.document()));
+ EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler> handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index.get()));
+
+ // Handle all docs
+ EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+ /*recovery_mode=*/false,
+ /*put_document_stats=*/nullptr),
+ IsOk());
+ EXPECT_THAT(index->last_added_document_id(), Eq(document_id2));
+
+ // We've disabled sorting during indexing so the HitBuffer's unsorted section
+ // should exceed the sort threshold. PersistToDisk and reinitialize the
+ // LiteIndex with sort_at_indexing=true.
+ ASSERT_THAT(index->PersistToDisk(), IsOk());
+ options = Index::Options(index_dir_,
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ // Verify that the HitBuffer has been sorted after initializing with
+ // sort_at_indexing enabled.
+ EXPECT_THAT(index->LiteIndexNeedSort(), IsFalse());
+
+ // Hits in the hit buffer:
+ // <term>: {(docId, sectionId, term_freq)...}
+ // foo: {(0, kSectionIdTitle, 3); (0, kSectionIdBody, 1);
+ // (1, kSectionIdBody, 2);
+ // (2, kSectionIdNestedBody, 2)}
+ // bar: {(0, kSectionIdBody, 1);
+ // (1, kSectionIdTitle, 1);
+ // (2, kSectionIdNestedTitle, 1); (2, kSectionIdSubject, 2)}
+ // baz: {(0, kSectionIdBody, 1);
+ // (1, kSectionIdTitle, 2); (1, kSectionIdBody, 1),
+ // (2, kSectionIdNestedTitle, 2); (2, kSectionIdNestedBody, 1)}
+ // qux: {(2, kSectionIdName, 1)}
+
+ // Query 'foo'
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index->GetIterator("foo", /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+
+ // Advance the iterator and verify that we're returning hits in the correct
+ // order (i.e. in descending order of DocId)
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(2));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdNestedBody));
+ std::vector<TermMatchInfo> matched_terms_stats;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map2 = {{kSectionIdNestedBody, 2}};
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map2)));
+
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(1));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdBody));
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map1 = {{kSectionIdBody, 2}};
+ matched_terms_stats.clear();
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map1)));
+
+ ASSERT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(0));
+ EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+ Eq(1U << kSectionIdTitle | 1U << kSectionIdBody));
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map0 = {{kSectionIdTitle, 3},
+ {kSectionIdBody, 1}};
+ matched_terms_stats.clear();
+ itr->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "foo", expected_section_ids_tf_map0)));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/jni.lds b/icing/jni.lds
index 401682a..64fae36 100644
--- a/icing/jni.lds
+++ b/icing/jni.lds
@@ -1,7 +1,6 @@
VERS_1.0 {
# Export JNI symbols.
global:
- Java_*;
JNI_OnLoad;
# Hide everything else
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index f2a33e0..f551054 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -36,10 +36,6 @@
namespace {
-// JNI string constants
-// Matches field name of IcingSearchEngine#nativePointer.
-const char kNativePointerField[] = "nativePointer";
-
bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes,
google::protobuf::MessageLite* protobuf) {
icing::lib::ScopedPrimitiveArrayCritical<uint8_t> scoped_array(env, bytes);
@@ -61,11 +57,14 @@ jbyteArray SerializeProtoToJniByteArray(JNIEnv* env,
return ret;
}
+struct {
+ jfieldID native_pointer;
+} JavaIcingSearchEngineImpl;
+
icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(JNIEnv* env,
jobject object) {
- jclass cls = env->GetObjectClass(object);
- jfieldID field_id = env->GetFieldID(cls, kNativePointerField, "J");
- jlong native_pointer = env->GetLongField(object, field_id);
+ jlong native_pointer =
+ env->GetLongField(object, JavaIcingSearchEngineImpl.native_pointer);
return reinterpret_cast<icing::lib::IcingSearchEngine*>(native_pointer);
}
@@ -73,19 +72,8 @@ icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(JNIEnv* env,
extern "C" {
-jint JNI_OnLoad(JavaVM* vm, void* reserved) {
- JNIEnv* env;
- if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6) != JNI_OK) {
- ICING_LOG(icing::lib::ERROR) << "ERROR: GetEnv failed";
- return JNI_ERR;
- }
-
- return JNI_VERSION_1_6;
-}
-
-JNIEXPORT jlong JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeCreate(
- JNIEnv* env, jclass clazz, jbyteArray icing_search_engine_options_bytes) {
+jlong nativeCreate(JNIEnv* env, jclass clazz,
+ jbyteArray icing_search_engine_options_bytes) {
icing::lib::IcingSearchEngineOptions options;
if (!ParseProtoFromJniByteArray(env, icing_search_engine_options_bytes,
&options)) {
@@ -103,17 +91,13 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeCreate(
return reinterpret_cast<jlong>(icing);
}
-JNIEXPORT void JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDestroy(
- JNIEnv* env, jclass clazz, jobject object) {
+void nativeDestroy(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
delete icing;
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeInitialize(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeInitialize(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -123,10 +107,9 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeInitialize(
return SerializeProtoToJniByteArray(env, initialize_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetSchema(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray schema_bytes,
- jboolean ignore_errors_and_delete_documents) {
+jbyteArray nativeSetSchema(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray schema_bytes,
+ jboolean ignore_errors_and_delete_documents) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -143,9 +126,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetSchema(
return SerializeProtoToJniByteArray(env, set_schema_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchema(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeGetSchema(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -154,9 +135,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchema(
return SerializeProtoToJniByteArray(env, get_schema_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchemaType(
- JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
+jbyteArray nativeGetSchemaType(JNIEnv* env, jclass clazz, jobject object,
+ jstring schema_type) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -167,9 +147,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchemaType(
return SerializeProtoToJniByteArray(env, get_schema_type_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativePut(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray document_bytes) {
+jbyteArray nativePut(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray document_bytes) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -186,10 +165,9 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativePut(
return SerializeProtoToJniByteArray(env, put_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGet(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri,
- jbyteArray result_spec_bytes) {
+jbyteArray nativeGet(JNIEnv* env, jclass clazz, jobject object,
+ jstring name_space, jstring uri,
+ jbyteArray result_spec_bytes) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -208,9 +186,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGet(
return SerializeProtoToJniByteArray(env, get_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeReportUsage(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray usage_report_bytes) {
+jbyteArray nativeReportUsage(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray usage_report_bytes) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -227,9 +204,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeReportUsage(
return SerializeProtoToJniByteArray(env, report_usage_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetAllNamespaces(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeGetAllNamespaces(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -239,10 +214,9 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetAllNamespaces(
return SerializeProtoToJniByteArray(env, get_all_namespaces_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetNextPage(
- JNIEnv* env, jclass clazz, jobject object, jlong next_page_token,
- jlong java_to_native_start_timestamp_ms) {
+jbyteArray nativeGetNextPage(JNIEnv* env, jclass clazz, jobject object,
+ jlong next_page_token,
+ jlong java_to_native_start_timestamp_ms) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -263,9 +237,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetNextPage(
return SerializeProtoToJniByteArray(env, next_page_result_proto);
}
-JNIEXPORT void JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeInvalidateNextPageToken(
- JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) {
+void nativeInvalidateNextPageToken(JNIEnv* env, jclass clazz, jobject object,
+ jlong next_page_token) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -274,11 +247,11 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeInvalidateNextPageToke
return;
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearch(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
- jbyteArray scoring_spec_bytes, jbyteArray result_spec_bytes,
- jlong java_to_native_start_timestamp_ms) {
+jbyteArray nativeSearch(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray search_spec_bytes,
+ jbyteArray scoring_spec_bytes,
+ jbyteArray result_spec_bytes,
+ jlong java_to_native_start_timestamp_ms) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -321,10 +294,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearch(
return SerializeProtoToJniByteArray(env, search_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDelete(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space,
- jstring uri) {
+jbyteArray nativeDelete(JNIEnv* env, jclass clazz, jobject object,
+ jstring name_space, jstring uri) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -336,9 +307,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeDelete(
return SerializeProtoToJniByteArray(env, delete_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByNamespace(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space) {
+jbyteArray nativeDeleteByNamespace(JNIEnv* env, jclass clazz, jobject object,
+ jstring name_space) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -349,9 +319,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByNamespace(
return SerializeProtoToJniByteArray(env, delete_by_namespace_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteBySchemaType(
- JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
+jbyteArray nativeDeleteBySchemaType(JNIEnv* env, jclass clazz, jobject object,
+ jstring schema_type) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -362,10 +331,9 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteBySchemaType(
return SerializeProtoToJniByteArray(env, delete_by_schema_type_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByQuery(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
- jboolean return_deleted_document_info) {
+jbyteArray nativeDeleteByQuery(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray search_spec_bytes,
+ jboolean return_deleted_document_info) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -381,9 +349,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByQuery(
return SerializeProtoToJniByteArray(env, delete_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativePersistToDisk(
- JNIEnv* env, jclass clazz, jobject object, jint persist_type_code) {
+jbyteArray nativePersistToDisk(JNIEnv* env, jclass clazz, jobject object,
+ jint persist_type_code) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -400,9 +367,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativePersistToDisk(
return SerializeProtoToJniByteArray(env, persist_to_disk_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeOptimize(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeOptimize(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -411,9 +376,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeOptimize(
return SerializeProtoToJniByteArray(env, optimize_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetOptimizeInfo(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeGetOptimizeInfo(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -423,9 +386,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetOptimizeInfo(
return SerializeProtoToJniByteArray(env, get_optimize_info_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetStorageInfo(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeGetStorageInfo(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -435,9 +396,7 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetStorageInfo(
return SerializeProtoToJniByteArray(env, storage_info_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeReset(
- JNIEnv* env, jclass clazz, jobject object) {
+jbyteArray nativeReset(JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -446,10 +405,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeReset(
return SerializeProtoToJniByteArray(env, reset_result_proto);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearchSuggestions(
- JNIEnv* env, jclass clazz, jobject object,
- jbyteArray suggestion_spec_bytes) {
+jbyteArray nativeSearchSuggestions(JNIEnv* env, jclass clazz, jobject object,
+ jbyteArray suggestion_spec_bytes) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -466,9 +423,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearchSuggestions(
return SerializeProtoToJniByteArray(env, suggestionResponse);
}
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetDebugInfo(
- JNIEnv* env, jclass clazz, jobject object, jint verbosity) {
+jbyteArray nativeGetDebugInfo(JNIEnv* env, jclass clazz, jobject object,
+ jint verbosity) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
@@ -485,9 +441,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetDebugInfo(
return SerializeProtoToJniByteArray(env, debug_info_result_proto);
}
-JNIEXPORT jboolean JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeShouldLog(
- JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
+jboolean nativeShouldLog(JNIEnv* env, jclass clazz, jshort severity,
+ jshort verbosity) {
if (!icing::lib::LogSeverity::Code_IsValid(severity)) {
ICING_LOG(icing::lib::ERROR)
<< "Invalid value for logging severity: " << severity;
@@ -497,9 +452,8 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeShouldLog(
static_cast<icing::lib::LogSeverity::Code>(severity), verbosity);
}
-JNIEXPORT jboolean JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetLoggingLevel(
- JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
+jboolean nativeSetLoggingLevel(JNIEnv* env, jclass clazz, jshort severity,
+ jshort verbosity) {
if (!icing::lib::LogSeverity::Code_IsValid(severity)) {
ICING_LOG(icing::lib::ERROR)
<< "Invalid value for logging severity: " << severity;
@@ -509,216 +463,97 @@ Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetLoggingLevel(
static_cast<icing::lib::LogSeverity::Code>(severity), verbosity);
}
-JNIEXPORT jstring JNICALL
-Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetLoggingTag(
- JNIEnv* env, jclass clazz) {
+jstring nativeGetLoggingTag(JNIEnv* env, jclass clazz) {
return env->NewStringUTF(icing::lib::kIcingLoggingTag);
}
-// TODO(b/240333360) Remove the methods below for IcingSearchEngine once we have
-// a sync from Jetpack to g3 to contain the refactored IcingSearchEngine(with
-// IcingSearchEngineImpl).
-JNIEXPORT jlong JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeCreate(
- JNIEnv* env, jclass clazz, jbyteArray icing_search_engine_options_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeCreate(
- env, clazz, icing_search_engine_options_bytes);
-}
-
-JNIEXPORT void JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDestroy(JNIEnv* env,
- jclass clazz,
- jobject object) {
- Java_com_google_android_icing_IcingSearchEngineImpl_nativeDestroy(env, clazz,
- object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeInitialize(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeInitialize(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray schema_bytes,
- jboolean ignore_errors_and_delete_documents) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetSchema(
- env, clazz, object, schema_bytes, ignore_errors_and_delete_documents);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetSchema(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchema(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType(
- JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetSchemaType(
- env, clazz, object, schema_type);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativePut(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray document_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativePut(
- env, clazz, object, document_bytes);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGet(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri,
- jbyteArray result_spec_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGet(
- env, clazz, object, name_space, uri, result_spec_bytes);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeReportUsage(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray usage_report_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeReportUsage(
- env, clazz, object, usage_report_bytes);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetAllNamespaces(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetNextPage(
- JNIEnv* env, jclass clazz, jobject object, jlong next_page_token,
- jlong java_to_native_start_timestamp_ms) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetNextPage(
- env, clazz, object, next_page_token, java_to_native_start_timestamp_ms);
-}
-
-JNIEXPORT void JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeInvalidateNextPageToken(
- JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) {
- Java_com_google_android_icing_IcingSearchEngineImpl_nativeInvalidateNextPageToken(
- env, clazz, object, next_page_token);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSearch(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
- jbyteArray scoring_spec_bytes, jbyteArray result_spec_bytes,
- jlong java_to_native_start_timestamp_ms) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearch(
- env, clazz, object, search_spec_bytes, scoring_spec_bytes,
- result_spec_bytes, java_to_native_start_timestamp_ms);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDelete(JNIEnv* env,
- jclass clazz,
- jobject object,
- jstring name_space,
- jstring uri) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeDelete(
- env, clazz, object, name_space, uri);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace(
- JNIEnv* env, jclass clazz, jobject object, jstring name_space) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByNamespace(
- env, clazz, object, name_space);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType(
- JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteBySchemaType(
- env, clazz, object, schema_type);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery(
- JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
- jboolean return_deleted_document_info) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeDeleteByQuery(
- env, clazz, object, search_spec_bytes, return_deleted_document_info);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk(
- JNIEnv* env, jclass clazz, jobject object, jint persist_type_code) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativePersistToDisk(
- env, clazz, object, persist_type_code);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeOptimize(JNIEnv* env,
- jclass clazz,
- jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeOptimize(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetOptimizeInfo(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetStorageInfo(
- JNIEnv* env, jclass clazz, jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetStorageInfo(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeReset(JNIEnv* env,
- jclass clazz,
- jobject object) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeReset(
- env, clazz, object);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSearchSuggestions(
- JNIEnv* env, jclass clazz, jobject object,
- jbyteArray suggestion_spec_bytes) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeSearchSuggestions(
- env, clazz, object, suggestion_spec_bytes);
-}
-
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetDebugInfo(
- JNIEnv* env, jclass clazz, jobject object, jint verbosity) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetDebugInfo(
- env, clazz, object, verbosity);
-}
-
-JNIEXPORT jboolean JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeShouldLog(
- JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeShouldLog(
- env, clazz, severity, verbosity);
-}
+#pragma clang diagnostic ignored "-Wwrite-strings"
+jint JNI_OnLoad(JavaVM* vm, void* reserved) {
+ JNIEnv* env;
+ if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6) != JNI_OK) {
+ ICING_LOG(icing::lib::ERROR) << "ERROR: GetEnv failed";
+ return JNI_ERR;
+ }
-JNIEXPORT jboolean JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSetLoggingLevel(
- JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeSetLoggingLevel(
- env, clazz, severity, verbosity);
-}
+ // Find your class. JNI_OnLoad is called from the correct class loader context
+ // for this to work.
+ jclass java_class =
+ env->FindClass("com/google/android/icing/IcingSearchEngineImpl");
+ if (java_class == nullptr) {
+ return JNI_ERR;
+ }
+ JavaIcingSearchEngineImpl.native_pointer =
+ env->GetFieldID(java_class, "nativePointer", "J");
+
+ // Register your class' native methods.
+ static const JNINativeMethod methods[] = {
+ {"nativeCreate", "([B)J", reinterpret_cast<void*>(nativeCreate)},
+ {"nativeDestroy", "(Lcom/google/android/icing/IcingSearchEngineImpl;)V",
+ reinterpret_cast<void*>(nativeDestroy)},
+ {"nativeInitialize",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeInitialize)},
+ {"nativeSetSchema",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[BZ)[B",
+ reinterpret_cast<void*>(nativeSetSchema)},
+ {"nativeGetSchema",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeGetSchema)},
+ {"nativePut", "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+ reinterpret_cast<void*>(nativePut)},
+ {"nativeGet",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/"
+ "String;Ljava/lang/String;[B)[B",
+ reinterpret_cast<void*>(nativeGet)},
+ {"nativeReportUsage",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+ reinterpret_cast<void*>(nativeReportUsage)},
+ {"nativeGetAllNamespaces",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeGetAllNamespaces)},
+ {"nativeGetNextPage",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;JJ)[B",
+ reinterpret_cast<void*>(nativeGetNextPage)},
+ {"nativeInvalidateNextPageToken",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;J)V",
+ reinterpret_cast<void*>(nativeInvalidateNextPageToken)},
+ {"nativeSearch",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[B[B[BJ)[B",
+ reinterpret_cast<void*>(nativeSearch)},
+ {"nativeDelete",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/"
+ "String;Ljava/lang/String;)[B",
+ reinterpret_cast<void*>(nativeDelete)},
+ {"nativeDeleteByQuery",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[BZ)[B",
+ reinterpret_cast<void*>(nativeDeleteByQuery)},
+ {"nativePersistToDisk",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;I)[B",
+ reinterpret_cast<void*>(nativePersistToDisk)},
+ {"nativeOptimize", "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeOptimize)},
+ {"nativeGetOptimizeInfo",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeGetOptimizeInfo)},
+ {"nativeGetStorageInfo",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeGetStorageInfo)},
+ {"nativeReset", "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+ reinterpret_cast<void*>(nativeReset)},
+ {"nativeSearchSuggestions",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+ reinterpret_cast<void*>(nativeSearchSuggestions)},
+ {"nativeGetDebugInfo",
+ "(Lcom/google/android/icing/IcingSearchEngineImpl;I)[B",
+ reinterpret_cast<void*>(nativeGetDebugInfo)},
+ };
+ int register_natives_success = env->RegisterNatives(
+ java_class, methods, sizeof(methods) / sizeof(JNINativeMethod));
+ if (register_natives_success != JNI_OK) {
+ return register_natives_success;
+ }
-JNIEXPORT jstring JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetLoggingTag(
- JNIEnv* env, jclass clazz) {
- return Java_com_google_android_icing_IcingSearchEngineImpl_nativeGetLoggingTag(
- env, clazz);
+ return JNI_VERSION_1_6;
}
} // extern "C"
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index d118691..59e924d 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -125,7 +125,9 @@ class QueryVisitorTest : public ::testing::TestWithParam<QueryType> {
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_.c_str(),
- /*index_merge_size=*/1024 * 1024);
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 5e0b696..3e43ad9 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -176,6 +176,13 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
std::move(results.root_iterator), &document_store_, &schema_store_,
options, current_time_ms);
+ // TODO(b/294114230): Move this SectionRestrict filter from root level to
+ // lower levels if that would improve performance.
+ if (!search_spec.type_property_filters().empty()) {
+ results.root_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(results.root_iterator), &document_store_, &schema_store_,
+ search_spec, current_time_ms);
+ }
return results;
}
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 626f352..025e8e6 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -81,7 +81,9 @@ void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id,
std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
const Filesystem& filesystem,
const std::string& index_dir) {
- Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
+ Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
}
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 5bbf9ba..e64de32 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -113,7 +113,9 @@ class QueryProcessorTest
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_,
- /*index_merge_size=*/1024 * 1024);
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// TODO(b/249829533): switch to use persistent numeric index.
@@ -2646,6 +2648,261 @@ TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
+TEST_P(QueryProcessorTest, TypePropertyFilter) {
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
+ int email_bar_section_id = 0;
+ int email_baz_section_id = 1;
+ int email_foo_section_id = 2;
+ int message_bar_section_id = 0;
+ int message_baz_section_id = 1;
+ int message_foo_section_id = 2;
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // schema types populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("message")
+ .Build()));
+
+ // Poplate the index
+ TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+ // Email document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ // Message document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("animal");
+ search_spec.set_term_match_type(term_match_type);
+ search_spec.set_search_type(GetParam());
+
+ // email has property filters for foo and baz properties
+ TypePropertyMask *email_mask = search_spec.add_type_property_filters();
+ email_mask->set_schema_type("email");
+ email_mask->add_paths("foo");
+ email_mask->add_paths("baz");
+
+ // message has property filters for bar and baz properties
+ TypePropertyMask *message_mask = search_spec.add_type_property_filters();
+ message_mask->set_schema_type("message");
+ message_mask->add_paths("bar");
+ message_mask->add_paths("baz");
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+
+ // Ordered by descending DocumentId, so message comes first since it was
+ // inserted last
+ DocHitInfo expected_doc_hit_info1(message_document_id);
+ expected_doc_hit_info1.UpdateSection(message_bar_section_id);
+ expected_doc_hit_info1.UpdateSection(message_baz_section_id);
+ DocHitInfo expected_doc_hit_info2(email_document_id);
+ expected_doc_hit_info2.UpdateSection(email_foo_section_id);
+ expected_doc_hit_info2.UpdateSection(email_baz_section_id);
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(expected_doc_hit_info1, expected_doc_hit_info2));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+}
+
+TEST_P(QueryProcessorTest, TypePropertyFilterWithSectionRestrict) {
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
+ int email_bar_section_id = 0;
+ int email_baz_section_id = 1;
+ int email_foo_section_id = 2;
+ int message_bar_section_id = 0;
+ int message_baz_section_id = 1;
+ int message_foo_section_id = 2;
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // schema types populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("message")
+ .Build()));
+
+ // Poplate the index
+ TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+ // Email document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ // Message document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ SearchSpecProto search_spec;
+ // Create a section filter '<section name>:<query term>'
+ search_spec.set_query("foo:animal");
+ search_spec.set_term_match_type(term_match_type);
+ search_spec.set_search_type(GetParam());
+
+ // email has property filters for foo and baz properties
+ TypePropertyMask *email_mask = search_spec.add_type_property_filters();
+ email_mask->set_schema_type("email");
+ email_mask->add_paths("foo");
+ email_mask->add_paths("baz");
+
+ // message has property filters for bar and baz properties
+ TypePropertyMask *message_mask = search_spec.add_type_property_filters();
+ message_mask->set_schema_type("message");
+ message_mask->add_paths("bar");
+ message_mask->add_paths("baz");
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+
+ // Only hits in sections allowed by both the property filters and section
+ // restricts should be returned. Message document should not be returned since
+ // section foo specified in the section restrict is not allowed by the
+ // property filters.
+ DocHitInfo expected_doc_hit_info(email_document_id);
+ expected_doc_hit_info.UpdateSection(email_foo_section_id);
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(expected_doc_hit_info));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+}
+
TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index 49f8d43..9f9094d 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -96,7 +96,9 @@ class SuggestionProcessorTest : public Test {
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_,
- /*index_merge_size=*/1024 * 1024);
+ /*index_merge_size=*/1024 * 1024,
+ /*lite_index_sort_at_indexing=*/true,
+ /*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// TODO(b/249829533): switch to use persistent numeric index.
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index bcacdce..a389d13 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -780,6 +780,17 @@ libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
return schema_type_mapper_->Get(schema_type);
}
+libtextclassifier3::StatusOr<const std::string*> SchemaStore::GetSchemaType(
+ SchemaTypeId schema_type_id) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
+ if (const auto it = reverse_schema_type_mapper_.find(schema_type_id);
+ it == reverse_schema_type_mapper_.end()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ } else {
+ return &it->second;
+ }
+}
+
libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
SchemaStore::GetSchemaTypeIdsWithChildren(std::string_view schema_type) const {
ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 6075f5b..88968b1 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -276,6 +276,15 @@ class SchemaStore {
libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
GetSchemaTypeConfig(std::string_view schema_type) const;
+ // Returns the schema type of the passed in SchemaTypeId
+ //
+ // Returns:
+ // schema type on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
+ // INVALID_ARGUMENT if schema type id is invalid
+ libtextclassifier3::StatusOr<const std::string*> GetSchemaType(
+ SchemaTypeId schema_type_id) const;
+
// Returns the SchemaTypeId of the passed in schema type
//
// Returns:
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 4390b6a..af6feda 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -280,8 +280,6 @@ libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
// 4. "adjacent" has been fully expanded. Add all of its transitive
// outgoing relations to this type's transitive outgoing relations.
auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type);
- expanded_relations.reserve(expanded_relations.size() +
- adjacent_expanded_itr->second.size());
for (const auto& [transitive_reachable, _] :
adjacent_expanded_itr->second) {
// Insert a transitive reachable node `transitive_reachable` for `type` if
@@ -345,8 +343,6 @@ libtextclassifier3::Status CalculateAcyclicTransitiveRelations(
// 3. "adjacent" has been fully expanded. Add all of its transitive outgoing
// relations to this type's transitive outgoing relations.
auto adjacent_expanded_itr = expanded_relation_map->find(adjacent);
- expanded_relations.reserve(expanded_relations.size() +
- adjacent_expanded_itr->second.size());
for (const auto& [transitive_reachable, _] :
adjacent_expanded_itr->second) {
// Insert a transitive reachable node `transitive_reachable` for `type`.
@@ -526,7 +522,6 @@ BuildTransitiveDependentGraph(const SchemaProto& schema,
// Insert the parent_type into the dependent map if it is not present
// already.
merged_dependent_map.insert({parent_type, {}});
- merged_dependent_map[parent_type].reserve(inheritance_relation.size());
for (const auto& [child_type, _] : inheritance_relation) {
// Insert the child_type into parent_type's dependent map if it's not
// present already, in which case the value will be an empty vector.
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 47b94a5..79fcdb8 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -77,6 +77,7 @@ public class IcingSearchEngine implements IcingSearchEngineInterface {
icingSearchEngineImpl.close();
}
+ @SuppressWarnings("deprecation")
@Override
protected void finalize() throws Throwable {
icingSearchEngineImpl.close();
diff --git a/java/src/com/google/android/icing/IcingSearchEngineImpl.java b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
index 8e79a88..57744c4 100644
--- a/java/src/com/google/android/icing/IcingSearchEngineImpl.java
+++ b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
@@ -71,6 +71,7 @@ public class IcingSearchEngineImpl implements Closeable {
closed = true;
}
+ @SuppressWarnings("deprecation")
@Override
protected void finalize() throws Throwable {
close();
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index 1ed2d9a..d243f87 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -139,26 +139,6 @@ public final class IcingSearchEngineTest {
}
@Test
- public void testSetAndGetSchema() throws Exception {
- assertStatusOk(icingSearchEngine.initialize().getStatus());
-
- SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
- SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
- SetSchemaResultProto setSchemaResultProto =
- icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false);
- assertStatusOk(setSchemaResultProto.getStatus());
-
- GetSchemaResultProto getSchemaResultProto = icingSearchEngine.getSchema();
- assertStatusOk(getSchemaResultProto.getStatus());
- assertThat(getSchemaResultProto.getSchema()).isEqualTo(schema);
-
- GetSchemaTypeResultProto getSchemaTypeResultProto =
- icingSearchEngine.getSchemaType(emailTypeConfig.getSchemaType());
- assertStatusOk(getSchemaTypeResultProto.getStatus());
- assertThat(getSchemaTypeResultProto.getSchemaTypeConfig()).isEqualTo(emailTypeConfig);
- }
-
- @Test
public void testPutAndGetDocuments() throws Exception {
assertStatusOk(icingSearchEngine.initialize().getStatus());
@@ -316,56 +296,6 @@ public final class IcingSearchEngineTest {
}
@Test
- public void testDeleteByNamespace() throws Exception {
- assertStatusOk(icingSearchEngine.initialize().getStatus());
-
- SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
- SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
- assertThat(
- icingSearchEngine
- .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
- .getStatus()
- .getCode())
- .isEqualTo(StatusProto.Code.OK);
-
- DocumentProto emailDocument = createEmailDocument("namespace", "uri");
- assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
-
- DeleteByNamespaceResultProto deleteByNamespaceResultProto =
- icingSearchEngine.deleteByNamespace("namespace");
- assertStatusOk(deleteByNamespaceResultProto.getStatus());
-
- GetResultProto getResultProto =
- icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
- assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
- }
-
- @Test
- public void testDeleteBySchemaType() throws Exception {
- assertStatusOk(icingSearchEngine.initialize().getStatus());
-
- SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
- SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
- assertThat(
- icingSearchEngine
- .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
- .getStatus()
- .getCode())
- .isEqualTo(StatusProto.Code.OK);
-
- DocumentProto emailDocument = createEmailDocument("namespace", "uri");
- assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
-
- DeleteBySchemaTypeResultProto deleteBySchemaTypeResultProto =
- icingSearchEngine.deleteBySchemaType(EMAIL_TYPE);
- assertStatusOk(deleteBySchemaTypeResultProto.getStatus());
-
- GetResultProto getResultProto =
- icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
- assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
- }
-
- @Test
public void testDeleteByQuery() throws Exception {
assertStatusOk(icingSearchEngine.initialize().getStatus());
@@ -770,31 +700,6 @@ public final class IcingSearchEngineTest {
assertThat(response.getSuggestions(1).getQuery()).isEqualTo("fo");
}
- @Test
- public void testLogging() throws Exception {
- // Set to INFO
- assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.INFO)).isTrue();
- assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isTrue();
- assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isFalse();
-
- // Set to WARNING
- assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.WARNING)).isTrue();
- assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.WARNING)).isTrue();
- assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isFalse();
-
- // Set to DEBUG
- assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.DBG)).isTrue();
- assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isTrue();
- assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE)).isFalse();
-
- // Set to VERBOSE
- assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.VERBOSE, (short) 1)).isTrue();
- assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 1)).isTrue();
- assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 2)).isFalse();
-
- assertThat(IcingSearchEngine.getLoggingTag()).isNotEmpty();
- }
-
private static void assertStatusOk(StatusProto status) {
assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK);
}
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index 507e745..958767b 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
-// Next tag: 12
+// Next tag: 14
message IcingSearchEngineOptions {
// Directory to persist files for Icing. Required.
// If Icing was previously initialized with this directory, it will reload
@@ -107,6 +107,26 @@ message IcingSearchEngineOptions {
// Integer index bucket split threshold.
optional int32 integer_index_bucket_split_threshold = 11 [default = 65536];
+ // Whether Icing should sort and merge its lite index HitBuffer unsorted tail
+ // at indexing time.
+ //
+ // If set to true, the HitBuffer will be sorted at indexing time after
+ // exceeding the sort threshold. If false, the HifBuffer will be sorted at
+ // querying time, before the first query after inserting new elements into the
+ // HitBuffer.
+ //
+ // The default value is false.
+ optional bool lite_index_sort_at_indexing = 12;
+
+ // Size (in bytes) at which Icing's lite index should sort and merge the
+ // HitBuffer's unsorted tail into the sorted head for sorting at indexing
+ // time. Size specified here is the maximum byte size to allow for the
+ // unsorted tail section.
+ //
+ // Setting a lower sort size reduces querying latency at the expense of
+ // indexing latency.
+ optional int32 lite_index_sort_size = 13 [default = 8192]; // 8 KiB
+
reserved 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index ca795cd..418fc88 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -76,7 +76,7 @@ message InitializeStatsProto {
// Time used to restore the index.
optional int32 index_restoration_latency_ms = 6;
- // Time used to restore the index.
+ // Time used to restore the schema store.
optional int32 schema_store_recovery_latency_ms = 7;
// Status regarding how much data is lost during the initialization.
@@ -117,7 +117,7 @@ message InitializeStatsProto {
}
// Stats of the top-level function IcingSearchEngine::Put().
-// Next tag: 10
+// Next tag: 11
message PutDocumentStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -151,6 +151,9 @@ message PutDocumentStatsProto {
// Time used to index all qualified id join strings in the document.
optional int32 qualified_id_join_index_latency_ms = 9;
+
+ // Time used to sort and merge the LiteIndex's HitBuffer.
+ optional int32 lite_index_sort_latency_ms = 10;
}
// Stats of the top-level function IcingSearchEngine::Search() and
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index 411ad3e..7f4fb3e 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -27,7 +27,7 @@ option java_multiple_files = true;
option objc_class_prefix = "ICNG";
// Client-supplied specifications on what documents to retrieve.
-// Next tag: 10
+// Next tag: 11
message SearchSpecProto {
// REQUIRED: The "raw" query string that users may type. For example, "cat"
// will search for documents with the term cat in it.
@@ -102,6 +102,16 @@ message SearchSpecProto {
// Finer-grained locks are implemented around code paths that write changes to
// Icing during Search.
optional bool use_read_only_search = 9 [default = true];
+
+ // TODO(b/294266822): Handle multiple property filter lists for same schema
+ // type.
+ // How to specify a subset of properties to be searched. If no type property
+ // filter has been specified for a schema type (no TypePropertyMask for the
+ // given schema type), then *all* properties of that schema type will be
+ // searched. If an empty property filter is specified for a given schema type
+ // (TypePropertyMask for the given schema type has empty paths field), no
+ // properties of that schema type will be searched.
+ repeated TypePropertyMask type_property_filters = 10;
}
// Client-supplied specifications on what to include/how to format the search
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 58918d3..bd3f395 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=548180296)
+set(synced_AOSP_CL_number=561560020)