aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJiayu Hu <hujiayu@google.com>2023-04-19 12:31:23 -0700
committerJiayu Hu <hujiayu@google.com>2023-04-19 12:33:32 -0700
commita7d57e98ea7168d66cf01ace85598e33d5e9e5db (patch)
tree6685971fd4a027f52d674ee646a79ab41d4ce24f
parentb59049b030cc330b6c8ae1d03ea4c1a34235ac9b (diff)
downloadicing-a7d57e98ea7168d66cf01ace85598e33d5e9e5db.tar.gz
Update Icing from upstream.
Descriptions: ======================================================================== Adds join info to QueryStatsProto ======================================================================== [Join][cleanup][1/x] Add Icing benchmark for qualified id join ======================================================================== [NumericSearch][benchmark][2/x] Modify run-benchmarks.py to onboard numeric search benchmark ======================================================================== [NumericSearch][benchmark][1/x] Add numeric search icing search engine benchmark ======================================================================== Switch Iterator access type used in SnippetRetriever back to kForwardIterator. ======================================================================== Fix `DocumentStore::OptimizeInto` by passing `options_.document_store_namespace_id_fingerprint()` ======================================================================== Add file size check when initializing mmap for existing metadata file. ======================================================================== Add compression level option to Icing and propagate down to PortableFileBackedProtoLog ======================================================================== Add portable_equals_proto namespace for EqualsProto in tokenized-document_test. ======================================================================== Refactor schema to check consistency of polymorphism by dependency map ======================================================================== Support the `this.propertyWeights()` function in advanced scoring ======================================================================== Randomly set optimize_rebuild_index_threshold in monkey test ======================================================================== Change benchmark batch query latency to average ======================================================================== Performance improvements to SnippetRetriever. ======================================================================== Bug: 270102295 Bug: 260625837 Bug: 269295094 Bug: 193244409 Bug: 269513147 Bug: 276345448 Bug: 259744228 Bug: 272145329 Bug: 276349029 Change-Id: Ief1cb2c21f67aac420d6b79d9b946bbecdc39a5c
-rw-r--r--icing/document-builder.h26
-rw-r--r--icing/file/portable-file-backed-proto-log.h60
-rw-r--r--icing/file/portable-file-backed-proto-log_test.cc296
-rw-r--r--icing/icing-search-engine.cc26
-rw-r--r--icing/icing-search-engine_benchmark.cc414
-rw-r--r--icing/icing-search-engine_initialization_test.cc57
-rw-r--r--icing/icing-search-engine_optimize_test.cc81
-rw-r--r--icing/icing-search-engine_search_test.cc257
-rw-r--r--icing/index/index-processor_test.cc7
-rw-r--r--icing/index/integer-section-indexing-handler_test.cc7
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter_test.cc51
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc7
-rw-r--r--icing/index/numeric/integer-index-storage.cc3
-rw-r--r--icing/index/numeric/integer-index.cc7
-rw-r--r--icing/index/numeric/integer-index_test.cc17
-rw-r--r--icing/join/join-processor_test.cc7
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.cc6
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc7
-rw-r--r--icing/query/query-processor_benchmark.cc27
-rw-r--r--icing/query/query-processor_test.cc23
-rw-r--r--icing/query/suggestion-processor_test.cc7
-rw-r--r--icing/result/result-retriever-v2_group-result-limiter_test.cc7
-rw-r--r--icing/result/result-retriever-v2_projection_test.cc7
-rw-r--r--icing/result/result-retriever-v2_snippet_test.cc7
-rw-r--r--icing/result/result-retriever-v2_test.cc47
-rw-r--r--icing/result/result-state-manager_test.cc7
-rw-r--r--icing/result/result-state-manager_thread-safety_test.cc7
-rw-r--r--icing/result/result-state-v2_test.cc7
-rw-r--r--icing/result/snippet-retriever.cc3
-rw-r--r--icing/schema-builder.h5
-rw-r--r--icing/schema/schema-util.cc9
-rw-r--r--icing/schema/schema-util.h23
-rw-r--r--icing/schema/schema-util_test.cc224
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer.cc11
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer.h3
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc7
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer_test.cc208
-rw-r--r--icing/scoring/advanced_scoring/score-expression.cc53
-rw-r--r--icing/scoring/advanced_scoring/score-expression.h32
-rw-r--r--icing/scoring/advanced_scoring/scoring-visitor.cc5
-rw-r--r--icing/scoring/advanced_scoring/scoring-visitor.h3
-rw-r--r--icing/scoring/bm25f-calculator.cc10
-rw-r--r--icing/scoring/bm25f-calculator.h6
-rw-r--r--icing/scoring/score-and-rank_benchmark.cc27
-rw-r--r--icing/scoring/scorer-factory.cc10
-rw-r--r--icing/scoring/scorer_test.cc7
-rw-r--r--icing/scoring/scoring-processor_test.cc7
-rw-r--r--icing/store/document-log-creator.cc21
-rw-r--r--icing/store/document-log-creator.h6
-rw-r--r--icing/store/document-store.cc24
-rw-r--r--icing/store/document-store.h13
-rw-r--r--icing/store/document-store_benchmark.cc37
-rw-r--r--icing/store/document-store_test.cc463
-rw-r--r--icing/util/tokenized-document_test.cc2
-rw-r--r--proto/icing/proto/initialize.proto9
-rw-r--r--proto/icing/proto/logging.proto8
-rw-r--r--proto/icing/proto/schema.proto8
-rw-r--r--synced_AOSP_CL_number.txt2
58 files changed, 2271 insertions, 457 deletions
diff --git a/icing/document-builder.h b/icing/document-builder.h
index ba68ec5..44500f9 100644
--- a/icing/document-builder.h
+++ b/icing/document-builder.h
@@ -78,11 +78,25 @@ class DocumentBuilder {
return AddStringProperty(std::move(property_name), {string_values...});
}
+ // Takes a property name and iterator of int64_t values.
+ template <typename InputIt>
+ DocumentBuilder& AddInt64Property(std::string property_name, InputIt first,
+ InputIt last) {
+ auto property = document_.add_properties();
+ property->set_name(std::move(property_name));
+ for (InputIt it = first; it != last; ++it) {
+ property->mutable_int64_values()->Add(*it);
+ }
+ return *this;
+ }
+
// Takes a property name and any number of int64_t values.
template <typename... V>
DocumentBuilder& AddInt64Property(std::string property_name,
V... int64_values) {
- return AddInt64Property(std::move(property_name), {int64_values...});
+ std::initializer_list<int64_t> int64_values_list = {int64_values...};
+ return AddInt64Property(std::move(property_name), int64_values_list.begin(),
+ int64_values_list.end());
}
// Takes a property name and any number of double values.
@@ -128,16 +142,6 @@ class DocumentBuilder {
return *this;
}
- DocumentBuilder& AddInt64Property(
- std::string property_name, std::initializer_list<int64_t> int64_values) {
- auto property = document_.add_properties();
- property->set_name(std::move(property_name));
- for (int64_t int64_value : int64_values) {
- property->mutable_int64_values()->Add(int64_value);
- }
- return *this;
- }
-
DocumentBuilder& AddDoubleProperty(
std::string property_name, std::initializer_list<double> double_values) {
auto property = document_.add_properties();
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index 48e3501..a36bd9e 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -106,13 +106,32 @@ class PortableFileBackedProtoLog {
// compressed size larger than max_proto_size are also not accepted.
const int32_t max_proto_size;
+ // Level of compression if enabled, NO_COMPRESSION = 0, BEST_SPEED = 1,
+ // BEST_COMPRESSION = 9
+ const int32_t compression_level;
+
// Must specify values for options.
Options() = delete;
- explicit Options(bool compress_in,
- const int32_t max_proto_size_in = kMaxProtoSize)
- : compress(compress_in), max_proto_size(max_proto_size_in) {}
+ explicit Options(
+ bool compress_in, const int32_t max_proto_size_in = kMaxProtoSize,
+ const int32_t compression_level_in = kDeflateCompressionLevel)
+ : compress(compress_in),
+ max_proto_size(max_proto_size_in),
+ compression_level(compression_level_in) {}
};
+ // Our internal max for protos.
+ //
+ // WARNING: Changing this to a larger number may invalidate our assumption
+ // that that proto size can safely be stored in the last 3 bytes of the proto
+ // header.
+ static constexpr int kMaxProtoSize = (1 << 24) - 1; // 16MiB
+ static_assert(kMaxProtoSize <= 0x00FFFFFF,
+ "kMaxProtoSize doesn't fit in 3 bytes");
+
+ // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
+ static constexpr int kDeflateCompressionLevel = 3;
+
// Number of bytes we reserve for the heading at the beginning of the proto
// log. We reserve this so the header can grow without running into the
// contents of the proto log, triggering an unnecessary migration of the data.
@@ -480,7 +499,8 @@ class PortableFileBackedProtoLog {
// Object can only be instantiated via the ::Create factory.
PortableFileBackedProtoLog(const Filesystem* filesystem,
const std::string& file_path,
- std::unique_ptr<Header> header);
+ std::unique_ptr<Header> header,
+ int32_t compression_level);
// Initializes a new proto log.
//
@@ -556,18 +576,6 @@ class PortableFileBackedProtoLog {
// protos we support.
static constexpr uint8_t kProtoMagic = 0x5C;
- // Our internal max for protos.
- //
- // WARNING: Changing this to a larger number may invalidate our assumption
- // that that proto size can safely be stored in the last 3 bytes of the proto
- // header.
- static constexpr int kMaxProtoSize = (1 << 24) - 1; // 16MiB
- static_assert(kMaxProtoSize <= 0x00FFFFFF,
- "kMaxProtoSize doesn't fit in 3 bytes");
-
- // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
- static constexpr int kDeflateCompressionLevel = 3;
-
// Chunks of the file to mmap at a time, so we don't mmap the entire file.
// Only used on 32-bit devices
static constexpr int kMmapChunkSize = 4 * 1024 * 1024; // 4MiB
@@ -576,15 +584,17 @@ class PortableFileBackedProtoLog {
const Filesystem* const filesystem_;
const std::string file_path_;
std::unique_ptr<Header> header_;
+ const int32_t compression_level_;
};
template <typename ProtoT>
PortableFileBackedProtoLog<ProtoT>::PortableFileBackedProtoLog(
const Filesystem* filesystem, const std::string& file_path,
- std::unique_ptr<Header> header)
+ std::unique_ptr<Header> header, int32_t compression_level)
: filesystem_(filesystem),
file_path_(file_path),
- header_(std::move(header)) {
+ header_(std::move(header)),
+ compression_level_(compression_level) {
fd_.reset(filesystem_->OpenForAppend(file_path.c_str()));
}
@@ -617,6 +627,12 @@ PortableFileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
options.max_proto_size));
}
+ if (options.compression_level < 0 || options.compression_level > 9) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "options.compression_level must be between 0 and 9 inclusive, was %d",
+ options.compression_level));
+ }
+
if (!filesystem->FileExists(file_path.c_str())) {
return InitializeNewFile(filesystem, file_path, options);
}
@@ -660,7 +676,8 @@ PortableFileBackedProtoLog<ProtoT>::InitializeNewFile(
CreateResult create_result = {
std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
- std::move(header))),
+ std::move(header),
+ options.compression_level)),
/*data_loss=*/DataLoss::NONE, /*recalculated_checksum=*/false};
return create_result;
@@ -788,7 +805,8 @@ PortableFileBackedProtoLog<ProtoT>::InitializeExistingFile(
CreateResult create_result = {
std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
- std::move(header))),
+ std::move(header),
+ options.compression_level)),
data_loss, recalculated_checksum};
return create_result;
@@ -893,7 +911,7 @@ PortableFileBackedProtoLog<ProtoT>::WriteProto(const ProtoT& proto) {
if (header_->GetCompressFlag()) {
protobuf_ports::GzipOutputStream::Options options;
options.format = protobuf_ports::GzipOutputStream::ZLIB;
- options.compression_level = kDeflateCompressionLevel;
+ options.compression_level = compression_level_;
protobuf_ports::GzipOutputStream compressing_stream(&proto_stream, options);
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index af09d18..bf5e604 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -73,24 +73,18 @@ class PortableFileBackedProtoLogTest : public ::testing::Test {
const Filesystem filesystem_;
std::string file_path_;
bool compress_ = true;
+ int32_t compression_level_ =
+ PortableFileBackedProtoLog<DocumentProto>::kDeflateCompressionLevel;
int64_t max_proto_size_ = 256 * 1024; // 256 KiB
};
TEST_F(PortableFileBackedProtoLogTest, Initialize) {
- // max_proto_size must be greater than 0
- int invalid_max_proto_size = 0;
- ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
- &filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, invalid_max_proto_size)),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
ICING_ASSERT_OK_AND_ASSIGN(
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
EXPECT_THAT(create_result.proto_log, NotNull());
EXPECT_FALSE(create_result.has_data_loss());
EXPECT_FALSE(create_result.recalculated_checksum);
@@ -99,7 +93,41 @@ TEST_F(PortableFileBackedProtoLogTest, Initialize) {
ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- !compress_, max_proto_size_)),
+ !compress_, max_proto_size_, compression_level_)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, InitializeValidatesOptions) {
+ // max_proto_size must be greater than 0
+ int invalid_max_proto_size = 0;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, invalid_max_proto_size, compression_level_)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // max_proto_size must be under 16 MiB
+ invalid_max_proto_size = 16 * 1024 * 1024;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, invalid_max_proto_size, compression_level_)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // compression_level must be between 0 and 9 inclusive
+ int invalid_compression_level = -1;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, invalid_compression_level)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // compression_level must be between 0 and 9 inclusive
+ invalid_compression_level = 10;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, invalid_compression_level)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -108,8 +136,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
// With no protos written yet, the log should be minimum the size of the
// reserved header space.
@@ -123,8 +151,8 @@ TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -140,8 +168,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -175,7 +203,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- /*compress_in=*/false, max_proto_size_)));
+ /*compress_in=*/false, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -222,7 +250,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- /*compress_in=*/false, max_proto_size_)));
+ /*compress_in=*/false, max_proto_size_, compression_level_)));
auto recreated_proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -244,7 +272,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- /*compress_in=*/true, max_proto_size_)));
+ /*compress_in=*/true, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -291,7 +319,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- /*compress_in=*/true, max_proto_size_)));
+ /*compress_in=*/true, max_proto_size_, compression_level_)));
auto recreated_proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -304,6 +332,168 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
}
}
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteDifferentCompressionLevel) {
+ int document1_offset;
+ int document2_offset;
+ int document3_offset;
+
+ // The first proto to write that's close to the max size. Leave some room for
+ // the rest of the proto properties.
+ std::string long_str(max_proto_size_ - 1024, 'a');
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .AddStringProperty("long_str", long_str)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace2", "uri2").Build();
+ DocumentProto document3 =
+ DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level_in=*/3)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write the first proto
+ ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
+ proto_log->WriteProto(document1));
+
+ // Check that what we read is what we wrote
+ ASSERT_THAT(proto_log->ReadProto(document1_offset),
+ IsOkAndHolds(EqualsProto(document1)));
+
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+ }
+
+ // Make a new proto_log with the same file_path but different compression
+ // level, and make sure we can still read from and write to the same
+ // underlying file.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level_in=*/9)));
+ auto recreated_proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Check the first proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
+ IsOkAndHolds(EqualsProto(document1)));
+
+ // Write a second proto
+ ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+ recreated_proto_log->WriteProto(document2));
+
+ ASSERT_GT(document2_offset, document1_offset);
+
+ // Check the second proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
+ IsOkAndHolds(EqualsProto(document2)));
+
+ ICING_ASSERT_OK(recreated_proto_log->PersistToDisk());
+ }
+
+ // One more time but with 0 compression level
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level=*/0)));
+ auto recreated_proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Check the first proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
+ IsOkAndHolds(EqualsProto(document1)));
+
+ // Check the second proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
+ IsOkAndHolds(EqualsProto(document2)));
+
+ // Write a third proto
+ ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
+ recreated_proto_log->WriteProto(document3));
+
+ ASSERT_GT(document3_offset, document2_offset);
+
+ // Check the third proto
+ ASSERT_THAT(recreated_proto_log->ReadProto(document3_offset),
+ IsOkAndHolds(EqualsProto(document3)));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+ WriteDifferentCompressionLevelDifferentSizes) {
+ int document_log_size_with_compression_3;
+ int document_log_size_with_no_compression;
+
+ // The first proto to write that's close to the max size. Leave some room for
+ // the rest of the proto properties.
+ std::string long_str(max_proto_size_ - 1024, 'a');
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .AddStringProperty("long_str", long_str)
+ .Build();
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level_in=*/3)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write the proto
+ ICING_ASSERT_OK(proto_log->WriteProto(document1));
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+ document_log_size_with_compression_3 =
+ filesystem_.GetFileSize(file_path_.c_str());
+ }
+
+ // Delete the proto_log so we can reuse the file_path
+ filesystem_.DeleteFile(file_path_.c_str());
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_,
+ /*compression_level_in=*/0)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write the proto
+ ICING_ASSERT_OK(proto_log->WriteProto(document1));
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+ document_log_size_with_no_compression =
+ filesystem_.GetFileSize(file_path_.c_str());
+
+ // Uncompressed document file size should be larger than original compressed
+ // document file size
+ ASSERT_GT(document_log_size_with_no_compression,
+ document_log_size_with_compression_3);
+ }
+}
+
TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
{
ICING_ASSERT_OK_AND_ASSIGN(
@@ -311,7 +501,7 @@ TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto recreated_proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
}
@@ -328,7 +518,7 @@ TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)),
+ compress_, max_proto_size_, compression_level_)),
StatusIs(libtextclassifier3::StatusCode::INTERNAL,
HasSubstr("Invalid header checksum")));
}
@@ -341,7 +531,7 @@ TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto recreated_proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
@@ -360,7 +550,7 @@ TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)),
+ compress_, max_proto_size_, compression_level_)),
StatusIs(libtextclassifier3::StatusCode::INTERNAL,
HasSubstr("Invalid header kMagic")));
}
@@ -383,7 +573,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
@@ -410,7 +600,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
@@ -432,7 +622,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -478,7 +668,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_TRUE(create_result.has_data_loss());
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
@@ -507,7 +697,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -537,7 +727,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
@@ -569,7 +759,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -615,7 +805,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_TRUE(create_result.has_data_loss());
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
@@ -640,7 +830,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -663,7 +853,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
// We previously persisted to disk so everything should be in a perfect
// state.
@@ -683,7 +873,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -707,7 +897,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
// We previously persisted to disk so everything should be in a perfect
// state.
@@ -726,7 +916,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -751,7 +941,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
// We previously persisted to disk so everything should be in a perfect
// state.
@@ -771,7 +961,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -797,7 +987,7 @@ TEST_F(PortableFileBackedProtoLogTest,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
// We previously persisted to disk so everything should be in a perfect
// state.
@@ -819,8 +1009,8 @@ TEST_F(PortableFileBackedProtoLogTest, Iterator) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -872,7 +1062,7 @@ TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -890,7 +1080,7 @@ TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -915,8 +1105,8 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -950,8 +1140,8 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
- PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
- max_proto_size_)));
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -993,7 +1183,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -1021,7 +1211,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -1041,7 +1231,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_FALSE(create_result.has_data_loss());
@@ -1063,7 +1253,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
PortableFileBackedProtoLog<DocumentProto>::Create(
&filesystem_, file_path_,
PortableFileBackedProtoLog<DocumentProto>::Options(
- compress_, max_proto_size_)));
+ compress_, max_proto_size_, compression_level_)));
auto proto_log = std::move(create_result.proto_log);
EXPECT_FALSE(create_result.has_data_loss());
}
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index b81390f..56c7795 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -705,7 +705,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
schema_store_.get(),
force_recovery_and_revalidate_documents,
options_.document_store_namespace_id_fingerprint(),
- initialize_stats));
+ options_.compression_level(), initialize_stats));
document_store_ = std::move(create_result.document_store);
return libtextclassifier3::Status::OK;
@@ -1852,6 +1852,7 @@ SearchResultProto IcingSearchEngine::Search(
std::unique_ptr<ScoredDocumentHitsRanker> ranker;
if (join_children_fetcher != nullptr) {
+ std::unique_ptr<Timer> join_timer = clock_->GetNewTimer();
// Join 2 scored document hits
JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
qualified_id_join_index_.get());
@@ -1865,7 +1866,8 @@ SearchResultProto IcingSearchEngine::Search(
}
std::vector<JoinedScoredDocumentHit> joined_result_document_hits =
std::move(joined_result_document_hits_or).ValueOrDie();
- // TODO(b/256022027): set join latency
+
+ query_stats->set_join_latency_ms(join_timer->GetElapsedMilliseconds());
std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Ranks results
@@ -1921,8 +1923,11 @@ SearchResultProto IcingSearchEngine::Search(
// Assembles the final search result proto
result_proto.mutable_results()->Reserve(
page_result_info.second.results.size());
+
+ int32_t child_count = 0;
for (SearchResultProto::ResultProto& result :
page_result_info.second.results) {
+ child_count += result.joined_results_size();
result_proto.mutable_results()->Add(std::move(result));
}
@@ -1935,6 +1940,9 @@ SearchResultProto IcingSearchEngine::Search(
component_timer->GetElapsedMilliseconds());
query_stats->set_num_results_returned_current_page(
result_proto.results_size());
+
+ query_stats->set_num_joined_results_returned_current_page(child_count);
+
query_stats->set_num_results_with_snippets(
page_result_info.second.num_results_with_snippets);
return result_proto;
@@ -2055,8 +2063,11 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
// Assembles the final search result proto
result_proto.mutable_results()->Reserve(
page_result_info.second.results.size());
+
+ int32_t child_count = 0;
for (SearchResultProto::ResultProto& result :
page_result_info.second.results) {
+ child_count += result.joined_results_size();
result_proto.mutable_results()->Add(std::move(result));
}
@@ -2075,6 +2086,8 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
result_proto.results_size());
query_stats->set_num_results_with_snippets(
page_result_info.second.num_results_with_snippets);
+ query_stats->set_num_joined_results_returned_current_page(child_count);
+
return result_proto;
}
@@ -2106,7 +2119,8 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
// Copies valid document data to tmp directory
libtextclassifier3::StatusOr<std::vector<DocumentId>>
document_id_old_to_new_or = document_store_->OptimizeInto(
- temporary_document_dir, language_segmenter_.get(), optimize_stats);
+ temporary_document_dir, language_segmenter_.get(),
+ options_.document_store_namespace_id_fingerprint(), optimize_stats);
// Handles error if any
if (!document_id_old_to_new_or.ok()) {
@@ -2143,7 +2157,8 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
auto create_result_or = DocumentStore::Create(
filesystem_.get(), current_document_dir, clock_.get(),
schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
- options_.document_store_namespace_id_fingerprint());
+ options_.document_store_namespace_id_fingerprint(),
+ options_.compression_level(), /*initialize_stats=*/nullptr);
// TODO(b/144458732): Implement a more robust version of
// TC_ASSIGN_OR_RETURN that can support error logging.
if (!create_result_or.ok()) {
@@ -2170,7 +2185,8 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
auto create_result_or = DocumentStore::Create(
filesystem_.get(), current_document_dir, clock_.get(),
schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
- options_.document_store_namespace_id_fingerprint());
+ options_.document_store_namespace_id_fingerprint(),
+ options_.compression_level(), /*initialize_stats=*/nullptr);
if (!create_result_or.ok()) {
// Unable to create DocumentStore from the new file. Mark as uninitialized
// and return INTERNAL.
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index 9e8686d..cf654a8 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -16,7 +16,9 @@
#include <fstream>
#include <iostream>
+#include <limits>
#include <memory>
+#include <numeric>
#include <ostream>
#include <random>
#include <sstream>
@@ -32,6 +34,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/icing-search-engine.h"
+#include "icing/join/join-processor.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
#include "icing/proto/reset.pb.h"
@@ -40,9 +43,12 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/document-generator.h"
+#include "icing/testing/numeric/number-generator.h"
+#include "icing/testing/numeric/uniform-distribution-integer-generator.h"
#include "icing/testing/random-string.h"
#include "icing/testing/schema-generator.h"
#include "icing/testing/tmp-directory.h"
@@ -178,6 +184,25 @@ std::vector<DocumentProto> GenerateRandomDocuments(
return random_docs;
}
+std::unique_ptr<NumberGenerator<int64_t>> CreateIntegerGenerator(
+ size_t num_documents) {
+ // Since the collision # follows poisson distribution with lambda =
+ // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too many
+ // collisions.
+ //
+ // Distribution:
+ // - keys in range being picked for 0 times: 90.5%
+ // - keys in range being picked for 1 time: 9%
+ // - keys in range being picked for 2 times: 0.45%
+ // - keys in range being picked for 3 times: 0.015%
+ //
+ // For example, num_keys = 1M, range = 10M. Then there will be ~904837 unique
+ // keys, 45242 keys being picked twice, 1508 keys being picked thrice ...
+ return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
+ /*seed=*/12345, /*range_lower=*/0,
+ /*range_upper=*/static_cast<int64_t>(num_documents) * 10 - 1);
+}
+
void BM_IndexLatency(benchmark::State& state) {
// Initialize the filesystem
std::string test_dir = GetTestTempDir() + "/icing/benchmark";
@@ -799,6 +824,395 @@ void BM_QueryWithSnippet(benchmark::State& state) {
}
BENCHMARK(BM_QueryWithSnippet);
+void BM_NumericIndexing(benchmark::State& state) {
+ int num_documents = state.range(0);
+ int num_integers_per_doc = state.range(1);
+
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("integer")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+ CreateIntegerGenerator(num_documents);
+ std::vector<DocumentProto> documents;
+ documents.reserve(num_documents);
+ for (int i = 0; i < num_documents; ++i) {
+ std::vector<int64_t> integers;
+ integers.reserve(num_integers_per_doc);
+ for (int j = 0; j < num_integers_per_doc; ++j) {
+ integers.push_back(integer_generator->Generate());
+ }
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri" + std::to_string(i))
+ .SetSchema("Message")
+ .AddStringProperty("body", "body hello world")
+ .AddInt64Property("integer", integers.begin(), integers.end())
+ .Build();
+ documents.push_back(std::move(document));
+ }
+
+ for (auto s : state) {
+ state.PauseTiming();
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+ state.ResumeTiming();
+
+ for (const DocumentProto& document : documents) {
+ ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+ }
+
+ state.PauseTiming();
+ icing.reset();
+ ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
+ state.ResumeTiming();
+ }
+}
+
+BENCHMARK(BM_NumericIndexing)
+ // Arguments: num_documents, num_integers_per_doc
+ ->ArgPair(1000000, 5);
+
+void BM_NumericExactQuery(benchmark::State& state) {
+ int num_documents = state.range(0);
+ int num_integers_per_doc = state.range(1);
+
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("integer")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+ CreateIntegerGenerator(num_documents);
+ std::unordered_set<int64_t> chosen_integer_set;
+ for (int i = 0; i < num_documents; ++i) {
+ std::vector<int64_t> integers;
+ integers.reserve(num_integers_per_doc);
+ for (int j = 0; j < num_integers_per_doc; ++j) {
+ int64_t chosen_int = integer_generator->Generate();
+ integers.push_back(chosen_int);
+ chosen_integer_set.insert(chosen_int);
+ }
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri" + std::to_string(i))
+ .SetSchema("Message")
+ .AddStringProperty("body", "body hello world")
+ .AddInt64Property("integer", integers.begin(), integers.end())
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
+ }
+
+ SearchSpecProto search_spec;
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ std::vector<int64_t> chosen_integers(chosen_integer_set.begin(),
+ chosen_integer_set.end());
+ std::uniform_int_distribution<> distrib(0, chosen_integers.size() - 1);
+ std::default_random_engine e(/*seed=*/12345);
+ for (auto s : state) {
+ int64_t exact = chosen_integers[distrib(e)];
+ search_spec.set_query("integer == " + std::to_string(exact));
+
+ SearchResultProto results =
+ icing->Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(results.status(), ProtoIsOk());
+ ASSERT_GT(results.results_size(), 0);
+ if (results.next_page_token() != kInvalidNextPageToken) {
+ icing->InvalidateNextPageToken(results.next_page_token());
+ }
+ }
+}
+BENCHMARK(BM_NumericExactQuery)
+ // Arguments: num_documents, num_integers_per_doc
+ ->ArgPair(1000000, 5);
+
+void BM_NumericRangeQueryAll(benchmark::State& state) {
+ int num_documents = state.range(0);
+ int num_integers_per_doc = state.range(1);
+
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("integer")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+ CreateIntegerGenerator(num_documents);
+ for (int i = 0; i < num_documents; ++i) {
+ std::vector<int64_t> integers;
+ integers.reserve(num_integers_per_doc);
+ for (int j = 0; j < num_integers_per_doc; ++j) {
+ integers.push_back(integer_generator->Generate());
+ }
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri" + std::to_string(i))
+ .SetSchema("Message")
+ .AddStringProperty("body", "body hello world")
+ .AddInt64Property("integer", integers.begin(), integers.end())
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
+ }
+
+ SearchSpecProto search_spec;
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+ search_spec.set_query("integer >= " +
+ std::to_string(std::numeric_limits<int64_t>::min()));
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ for (auto s : state) {
+ SearchResultProto results =
+ icing->Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(results.status(), ProtoIsOk());
+ ASSERT_GT(results.results_size(), 0);
+ if (results.next_page_token() != kInvalidNextPageToken) {
+ icing->InvalidateNextPageToken(results.next_page_token());
+ }
+ }
+}
+BENCHMARK(BM_NumericRangeQueryAll)
+ // Arguments: num_documents, num_integers_per_doc
+ ->ArgPair(1000000, 5);
+
+void BM_JoinQueryQualifiedId(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create Person documents (parent)
+ static constexpr int kNumPersonDocuments = 1000;
+ for (int i = 0; i < kNumPersonDocuments; ++i) {
+ std::string person_id = std::to_string(i);
+ DocumentProto person =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person" + person_id)
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first" + person_id)
+ .AddStringProperty("lastName", "last" + person_id)
+ .AddStringProperty("emailAddress",
+ "person" + person_id + "@gmail.com")
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(person)).status(), ProtoIsOk());
+ }
+
+ // Create Email documents (child)
+ static constexpr int kNumEmailDocuments = 10000;
+ std::uniform_int_distribution<> distrib(0, kNumPersonDocuments - 1);
+ std::default_random_engine e(/*seed=*/12345);
+ for (int i = 0; i < kNumEmailDocuments; ++i) {
+ std::string email_id = std::to_string(i);
+ std::string person_id = std::to_string(distrib(e));
+ DocumentProto email =
+ DocumentBuilder()
+ .SetKey("namespace", "email" + email_id)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject " + email_id)
+ .AddStringProperty("body", "message body")
+ .AddStringProperty("personQualifiedId",
+ "pkg$db/namespace#person" + person_id)
+ .Build();
+ ASSERT_THAT(icing->Put(std::move(email)).status(), ProtoIsOk());
+ }
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(std::numeric_limits<int32_t>::max());
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::MAX);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ *nested_spec->mutable_scoring_spec() = ScoringSpecProto::default_instance();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ static constexpr int kNumPerPage = 10;
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(kNumPerPage);
+
+ ScoringSpecProto score_spec = ScoringSpecProto::default_instance();
+
+ const auto child_count_reduce_func =
+ [](int child_count, const SearchResultProto::ResultProto& result) -> int {
+ return child_count + result.joined_results_size();
+ };
+ for (auto s : state) {
+ int total_parent_count = 0;
+ int total_child_count = 0;
+ SearchResultProto results =
+ icing->Search(search_spec, score_spec, result_spec);
+ total_parent_count += results.results_size();
+ total_child_count +=
+ std::reduce(results.results().begin(), results.results().end(), 0,
+ child_count_reduce_func);
+
+ // Get all pages.
+ while (results.next_page_token() != kInvalidNextPageToken) {
+ results = icing->GetNextPage(results.next_page_token());
+ total_parent_count += results.results_size();
+ total_child_count +=
+ std::reduce(results.results().begin(), results.results().end(), 0,
+ child_count_reduce_func);
+ }
+
+ ASSERT_THAT(total_parent_count, Eq(kNumPersonDocuments));
+ ASSERT_THAT(total_child_count, Eq(kNumEmailDocuments));
+ }
+}
+BENCHMARK(BM_JoinQueryQualifiedId);
+
} // namespace
} // namespace lib
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index 6ba1737..0db4d54 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -383,6 +383,56 @@ TEST_F(IcingSearchEngineInitializationTest,
ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
+TEST_F(IcingSearchEngineInitializationTest,
+ NegativeCompressionLevelReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_compression_level(-1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ GreaterThanMaxCompressionLevelReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_compression_level(10);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, GoodCompressionLevelReturnsOk) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_compression_level(0);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ ReinitializingWithDifferentCompressionLevelReturnsOk) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_compression_level(3);
+ {
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+ }
+ options.set_compression_level(9);
+ {
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ }
+ options.set_compression_level(0);
+ {
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ }
+}
+
TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) {
auto mock_filesystem = std::make_unique<MockFilesystem>();
// This fails DocumentStore::Create()
@@ -980,7 +1030,12 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
- schema_store.get()));
+ schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc
index 0c5cb7a..48fae13 100644
--- a/icing/icing-search-engine_optimize_test.cc
+++ b/icing/icing-search-engine_optimize_test.cc
@@ -1753,6 +1753,87 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
}
+TEST_F(IcingSearchEngineOptimizeTest,
+ OptimizationRewritesDocsWithNewCompressionLevel) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body one")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body two")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+ icing_options.set_compression_level(3);
+ int64_t document_log_size_compression_3;
+ int64_t document_log_size_after_opti_no_compression;
+ int64_t document_log_size_after_opti_compression_3;
+ const std::string document_log_path =
+ icing_options.base_dir() + "/document_dir/" +
+ DocumentLogCreator::GetDocumentLogFilename();
+ {
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+ document_log_size_compression_3 =
+ filesystem()->GetFileSize(document_log_path.c_str());
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ // Turn off compression
+ icing_options.set_compression_level(0);
+
+ {
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Document log size is the same even after reopening with a different
+ // compression level
+ ASSERT_EQ(document_log_size_compression_3,
+ filesystem()->GetFileSize(document_log_path.c_str()));
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ document_log_size_after_opti_no_compression =
+ filesystem()->GetFileSize(document_log_path.c_str());
+ // Document log size is larger after optimizing since optimizing rewrites
+ // with the new compression level which is 0 or none
+ ASSERT_GT(document_log_size_after_opti_no_compression,
+ document_log_size_compression_3);
+ }
+
+ // Restore the original compression level
+ icing_options.set_compression_level(3);
+
+ {
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Document log size is the same even after reopening with a different
+ // compression level
+ ASSERT_EQ(document_log_size_after_opti_no_compression,
+ filesystem()->GetFileSize(document_log_path.c_str()));
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ document_log_size_after_opti_compression_3 =
+ filesystem()->GetFileSize(document_log_path.c_str());
+ // Document log size should be the same as it was originally
+ ASSERT_EQ(document_log_size_after_opti_compression_3,
+ document_log_size_compression_3);
+ }
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index f5b747b..63fb657 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc
@@ -3317,6 +3317,7 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
exp_stats.set_ranking_latency_ms(5);
exp_stats.set_document_retrieval_latency_ms(5);
exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(0);
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
// Second page, 2 result with 1 snippet
@@ -3333,6 +3334,7 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
exp_stats.set_latency_ms(5);
exp_stats.set_document_retrieval_latency_ms(5);
exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(0);
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
// Third page, 1 result with 0 snippets
@@ -3349,6 +3351,261 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
exp_stats.set_latency_ms(5);
exp_stats.set_document_retrieval_latency_ms(5);
exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(0);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto person3 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first3")
+ .AddStringProperty("lastName", "last3")
+ .AddStringProperty("emailAddress", "email3@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE);
+ scoring_spec.set_order_by(ScoringSpecProto::Order::DESC);
+
+ // Parent ResultSpec
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ // Since we:
+ // - Use MAX for aggregation scoring strategy.
+ // - (Default) use DOCUMENT_SCORE to score child documents.
+ // - (Default) use DESC as the ranking order.
+ //
+ // person1 + email1 should have the highest aggregated score (3) and be
+ // returned first. person2 + email2 (aggregated score = 2) should be the
+ // second, and person3 + email3 (aggregated score = 1) should be the last.
+ SearchResultProto expected_result1;
+ expected_result1.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto1 =
+ expected_result1.mutable_results()->Add();
+ *result_proto1->mutable_document() = person1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email2;
+
+ SearchResultProto expected_result2;
+ expected_result2.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_google::protobuf =
+ expected_result2.mutable_results()->Add();
+ *result_google::protobuf->mutable_document() = person2;
+ *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email3;
+
+ SearchResultProto expected_result3;
+ expected_result3.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto3 =
+ expected_result3.mutable_results()->Add();
+ *result_proto3->mutable_document() = person3;
+
+ SearchResultProto search_result =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = search_result.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result1.set_next_page_token(next_page_token);
+ ASSERT_THAT(search_result,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result1));
+
+ // Check the stats
+ QueryStatsProto exp_stats;
+ exp_stats.set_query_length(15);
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(0);
+ exp_stats.set_num_schema_types_filtered(0);
+ exp_stats.set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE);
+ exp_stats.set_is_first_page(true);
+ exp_stats.set_requested_page_size(1);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_documents_scored(3);
+ exp_stats.set_num_results_with_snippets(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_parse_query_latency_ms(5);
+ exp_stats.set_scoring_latency_ms(5);
+ exp_stats.set_ranking_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(2);
+ exp_stats.set_join_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Second page, 1 child doc.
+ search_result = icing.GetNextPage(next_page_token);
+ next_page_token = search_result.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result2.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result2));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(1);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_results_with_snippets(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_joined_results_returned_current_page(1);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Third page, 0 child docs.
+ search_result = icing.GetNextPage(next_page_token);
+ next_page_token = search_result.next_page_token();
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(1));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(1);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_joined_results_returned_current_page(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ exp_stats.set_num_results_with_snippets(0);
+ ASSERT_THAT(search_result,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result3));
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), IsEmpty());
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_lock_acquisition_latency_ms(5);
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
}
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 47baabe..9453e58 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -273,7 +273,12 @@ class IndexProcessorTest : public Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc
index 71c6bd5..895fe57 100644
--- a/icing/index/integer-section-indexing-handler_test.cc
+++ b/icing/index/integer-section-indexing-handler_test.cc
@@ -163,7 +163,12 @@ class IntegerSectionIndexingHandlerTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult doc_store_create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(doc_store_create_result.document_store);
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index ddb216a..0900e1f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -48,6 +48,17 @@ using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
protected:
DocHitInfoIteratorDeletedFilterTest()
@@ -73,8 +84,8 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -240,8 +251,8 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -395,8 +406,8 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -535,8 +546,8 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -563,8 +574,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -595,8 +606,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -627,8 +638,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -660,8 +671,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -735,8 +746,8 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
}
@@ -770,8 +781,8 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 5dd69c1..60b9a12 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -100,7 +100,12 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
index db1983c..f3901e1 100644
--- a/icing/index/numeric/integer-index-storage.cc
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -818,6 +818,9 @@ IntegerIndexStorage::InitializeExistingFiles(
/*max_file_size=*/kMetadataFileSize,
/*pre_mapping_file_offset=*/0,
/*pre_mapping_mmap_size=*/kMetadataFileSize));
+ if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+ return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+ }
// Initialize sorted_buckets
int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc
index 2f876e4..e7978bf 100644
--- a/icing/index/numeric/integer-index.cc
+++ b/icing/index/numeric/integer-index.cc
@@ -293,6 +293,10 @@ libtextclassifier3::Status IntegerIndex::Optimize(
/*max_file_size=*/kMetadataFileSize,
/*pre_mapping_file_offset=*/0,
/*pre_mapping_mmap_size=*/kMetadataFileSize));
+ if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+ return absl_ports::InternalError(
+ "Invalid metadata file size after Optimize");
+ }
metadata_mmapped_file_ =
std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
@@ -410,6 +414,9 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
/*max_file_size=*/kMetadataFileSize,
/*pre_mapping_file_offset=*/0,
/*pre_mapping_mmap_size=*/kMetadataFileSize));
+ if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+ return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+ }
auto posting_list_serializer =
std::make_unique<PostingListIntegerIndexSerializer>();
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
index c4dacb8..ec7f55b 100644
--- a/icing/index/numeric/integer-index_test.cc
+++ b/icing/index/numeric/integer-index_test.cc
@@ -84,7 +84,12 @@ class NumericIndexIntegerTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult doc_store_create_result,
DocumentStore::Create(&filesystem_, document_store_dir, &clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(doc_store_create_result.document_store);
}
@@ -132,7 +137,8 @@ class NumericIndexIntegerTest : public ::testing::Test {
}
ICING_ASSIGN_OR_RETURN(
std::vector<DocumentId> docid_map,
- doc_store_->OptimizeInto(document_store_compact_dir, nullptr));
+ doc_store_->OptimizeInto(document_store_compact_dir, nullptr,
+ /*namespace_id_fingerprint=*/false));
doc_store_.reset();
if (!filesystem_.SwapFiles(document_store_dir.c_str(),
@@ -147,7 +153,12 @@ class NumericIndexIntegerTest : public ::testing::Test {
ICING_ASSIGN_OR_RETURN(
DocumentStore::CreateResult doc_store_create_result,
DocumentStore::Create(&filesystem_, document_store_dir, &clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(doc_store_create_result.document_store);
return docid_map;
}
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
index 25d4cfc..67b6201 100644
--- a/icing/join/join-processor_test.cc
+++ b/icing/join/join-processor_test.cc
@@ -110,7 +110,12 @@ class JoinProcessorTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc
index 89b8e89..558da1c 100644
--- a/icing/monkey_test/icing-monkey-test-runner.cc
+++ b/icing/monkey_test/icing-monkey-test-runner.cc
@@ -455,9 +455,15 @@ void IcingMonkeyTestRunner::DoOptimize() {
void IcingMonkeyTestRunner::CreateIcingSearchEngine() {
std::uniform_int_distribution<> dist(0, 1);
+ bool always_rebuild_index_optimize = dist(random_);
+ float optimize_rebuild_index_threshold =
+ always_rebuild_index_optimize ? 0.0 : 0.9;
+
IcingSearchEngineOptions icing_options;
icing_options.set_index_merge_size(config_.index_merge_size);
icing_options.set_base_dir(icing_dir_->dir());
+ icing_options.set_optimize_rebuild_index_threshold(
+ optimize_rebuild_index_threshold);
// The method will be called every time when we ReloadFromDisk(), so randomly
// flip this flag to test document store's compatibility.
icing_options.set_document_store_namespace_id_fingerprint(
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index 7aef40f..c48d9ad 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -115,7 +115,12 @@ class QueryVisitorTest : public ::testing::TestWithParam<QueryType> {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_.c_str(),
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 111b598..6826c22 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -92,6 +92,17 @@ std::unique_ptr<Normalizer> CreateNormalizer() {
.ValueOrDie();
}
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
void BM_QueryOneTerm(benchmark::State& state) {
bool run_via_adb = absl::GetFlag(FLAGS_adb);
if (!run_via_adb) {
@@ -136,8 +147,8 @@ void BM_QueryOneTerm(benchmark::State& state) {
ICING_ASSERT_OK(schema_store->SetSchema(schema));
DocumentStore::CreateResult create_result =
- DocumentStore::Create(&filesystem, doc_store_dir, &clock,
- schema_store.get())
+ CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+ schema_store.get())
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -262,8 +273,8 @@ void BM_QueryFiveTerms(benchmark::State& state) {
ICING_ASSERT_OK(schema_store->SetSchema(schema));
DocumentStore::CreateResult create_result =
- DocumentStore::Create(&filesystem, doc_store_dir, &clock,
- schema_store.get())
+ CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+ schema_store.get())
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -406,8 +417,8 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
ICING_ASSERT_OK(schema_store->SetSchema(schema));
DocumentStore::CreateResult create_result =
- DocumentStore::Create(&filesystem, doc_store_dir, &clock,
- schema_store.get())
+ CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+ schema_store.get())
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -535,8 +546,8 @@ void BM_QueryHiragana(benchmark::State& state) {
ICING_ASSERT_OK(schema_store->SetSchema(schema));
DocumentStore::CreateResult create_result =
- DocumentStore::Create(&filesystem, doc_store_dir, &clock,
- schema_store.get())
+ CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+ schema_store.get())
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index cbf859b..47245fd 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -63,6 +63,17 @@ using ::testing::IsEmpty;
using ::testing::SizeIs;
using ::testing::UnorderedElementsAre;
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
class QueryProcessorTest
: public ::testing::TestWithParam<SearchSpecProto::SearchType::Code> {
protected:
@@ -96,8 +107,8 @@ class QueryProcessorTest
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, store_dir_, &fake_clock_,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_,
@@ -2501,8 +2512,8 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, store_dir_, &fake_clock,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2559,8 +2570,8 @@ TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, store_dir_, &fake_clock,
+ schema_store_.get()));
document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index d4ecec0..7d45de7 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -86,7 +86,12 @@ class SuggestionProcessorTest : public Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_,
diff --git a/icing/result/result-retriever-v2_group-result-limiter_test.cc b/icing/result/result-retriever-v2_group-result-limiter_test.cc
index 7027cc5..d4aaa38 100644
--- a/icing/result/result-retriever-v2_group-result-limiter_test.cc
+++ b/icing/result/result-retriever-v2_group-result-limiter_test.cc
@@ -88,7 +88,12 @@ class ResultRetrieverV2GroupResultLimiterTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/result/result-retriever-v2_projection_test.cc b/icing/result/result-retriever-v2_projection_test.cc
index 629fb34..94580d4 100644
--- a/icing/result/result-retriever-v2_projection_test.cc
+++ b/icing/result/result-retriever-v2_projection_test.cc
@@ -115,7 +115,12 @@ class ResultRetrieverV2ProjectionTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/result/result-retriever-v2_snippet_test.cc b/icing/result/result-retriever-v2_snippet_test.cc
index b32800c..3dce0ef 100644
--- a/icing/result/result-retriever-v2_snippet_test.cc
+++ b/icing/result/result-retriever-v2_snippet_test.cc
@@ -107,7 +107,12 @@ class ResultRetrieverV2SnippetTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc
index 6c2aa67..462d535 100644
--- a/icing/result/result-retriever-v2_test.cc
+++ b/icing/result/result-retriever-v2_test.cc
@@ -197,6 +197,17 @@ ResultSpecProto CreateResultSpec(
return result_spec;
}
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
EXPECT_THAT(
ResultRetrieverV2::Create(/*doc_store=*/nullptr, schema_store_.get(),
@@ -205,8 +216,8 @@ TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -227,8 +238,8 @@ TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -320,8 +331,8 @@ TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -392,8 +403,8 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&mock_filesystem, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -437,8 +448,8 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -522,8 +533,8 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -621,8 +632,8 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
TEST_F(ResultRetrieverV2Test, ShouldLimitNumTotalBytesPerPage) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -681,8 +692,8 @@ TEST_F(ResultRetrieverV2Test,
ShouldReturnSingleLargeResultAboveNumTotalBytesPerPageThreshold) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -743,8 +754,8 @@ TEST_F(ResultRetrieverV2Test,
ShouldRetrieveNextResultWhenBelowNumTotalBytesPerPageThreshold) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index c0ea49a..44bfe2d 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -106,7 +106,12 @@ class ResultStateManagerTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
DocumentStore::Create(&filesystem_, test_dir_, clock_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/result/result-state-manager_thread-safety_test.cc b/icing/result/result-state-manager_thread-safety_test.cc
index 55eda85..670578f 100644
--- a/icing/result/result-state-manager_thread-safety_test.cc
+++ b/icing/result/result-state-manager_thread-safety_test.cc
@@ -99,7 +99,12 @@ class ResultStateManagerThreadSafetyTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
DocumentStore::Create(&filesystem_, test_dir_, clock_.get(),
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc
index 8706b6d..35b6401 100644
--- a/icing/result/result-state-v2_test.cc
+++ b/icing/result/result-state-v2_test.cc
@@ -68,7 +68,12 @@ class ResultStateV2Test : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
num_total_hits_ = 0;
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index d654195..2c4023c 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -489,8 +489,7 @@ void GetEntriesFromProperty(const PropertyProto* current_property,
std::string_view value = current_property->string_values(i);
std::unique_ptr<Tokenizer::Iterator> iterator =
tokenizer
- ->Tokenize(value,
- LanguageSegmenter::AccessType::kBidirectionalIterator)
+ ->Tokenize(value, LanguageSegmenter::AccessType::kForwardIterator)
.ValueOrDie();
// All iterators are moved through positions sequentially. Constructing them
// each time resets them to the beginning of the string. This means that,
diff --git a/icing/schema-builder.h b/icing/schema-builder.h
index d1ef42f..1dceb62 100644
--- a/icing/schema-builder.h
+++ b/icing/schema-builder.h
@@ -158,6 +158,11 @@ class SchemaTypeConfigBuilder {
return *this;
}
+ SchemaTypeConfigBuilder& SetParentType(std::string_view parent_type) {
+ type_config_.set_parent_type(std::string(parent_type));
+ return *this;
+ }
+
SchemaTypeConfigBuilder& SetVersion(int version) {
type_config_.set_version(version);
return *this;
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index ea0d85a..f3f7aad 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -280,6 +280,15 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) {
}
known_types.insert(schema_type);
unknown_types.erase(schema_type);
+ if (!type_config.parent_type().empty()) {
+ std::string_view parent_schema_type(type_config.parent_type());
+ if (known_types.count(parent_schema_type) == 0) {
+ unknown_types.insert(parent_schema_type);
+ }
+ // Try to add schema_type to the parent type's dependent map when it is
+ // not present already, in which case the value will be an empty vector.
+ dependent_map[parent_schema_type].insert({schema_type, {}});
+ }
for (const auto& property_config : type_config.properties()) {
if (property_config.data_type() ==
PropertyConfigProto::DataType::DOCUMENT) {
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index 47bb76b..825625e 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -34,15 +34,20 @@ class SchemaUtil {
std::unordered_map<std::string, const SchemaTypeConfigProto>;
// If A -> B is indicated in the map, then type A must be built before
- // building type B, i.e. B has a property of type A. Also include all
- // PropertyConfigProto (with DOCUMENT data_type) pointers which directly
- // connects type A and B. IOW, this vector of PropertyConfigProto* are "direct
- // edges" connecting A and B directly. It will be an empty vector if A and B
- // are not "directly" connected, but instead via another intermediate level of
- // schema type. For example, the actual dependency is A -> C -> B, so there
- // will be A -> C and C -> B with valid PropertyConfigProto* respectively in
- // this map, but we will also expand transitive dependents: add A -> B into
- // dependent map with empty vector of "edges".
+ // building type B, which implies one of the following situations.
+ //
+ // 1. B has a property of type A.
+ // 2. A is a parent type of B via polymorphism.
+ //
+ // For the first case, this map will also include all PropertyConfigProto
+ // (with DOCUMENT data_type) pointers which *directly* connects type A and B.
+ // IOW, this vector of PropertyConfigProto* are "direct edges" connecting A
+ // and B directly. It will be an empty vector if A and B are not "directly"
+ // connected, but instead via another intermediate level of schema type. For
+ // example, the actual dependency is A -> C -> B, so there will be A -> C and
+ // C -> B with valid PropertyConfigProto* respectively in this map, but we
+ // will also expand transitive dependents: add A -> B into dependent map with
+ // empty vector of "edges".
using DependentMap = std::unordered_map<
std::string_view,
std::unordered_map<std::string_view,
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index 2d1e683..df7a421 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -36,6 +36,7 @@ using ::testing::HasSubstr;
using ::testing::IsEmpty;
using ::testing::Pair;
using ::testing::Pointee;
+using ::testing::SizeIs;
using ::testing::UnorderedElementsAre;
// Properties/fields in a schema type
@@ -467,6 +468,229 @@ TEST(SchemaUtilTest, NonExistentType) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
+TEST(SchemaUtilTest, SimpleInheritance) {
+ // Create a schema with the following inheritance relation:
+ // A <- B
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema));
+ EXPECT_THAT(d_map, SizeIs(1));
+ EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty())));
+}
+
+TEST(SchemaUtilTest, ComplexInheritance) {
+ // Create a schema with the following inheritance relation:
+ // A
+ // / \
+ // B E
+ // / \
+ // C D
+ // |
+ // F
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder().SetType("D").SetParentType("B").Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder().SetType("E").SetParentType("A").Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder().SetType("F").SetParentType("D").Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()),
+ Pair("D", IsEmpty()), Pair("E", IsEmpty()),
+ Pair("F", IsEmpty())));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("F", IsEmpty())));
+ EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair("F", IsEmpty())));
+}
+
+TEST(SchemaUtilTest, InheritanceCycle) {
+ // Create a schema with the following inheritance relation:
+ // C <- A <- B <- C
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder().SetType("A").SetParentType("C").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(SchemaUtilTest, SelfInheritance) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder().SetType("A").SetParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(SchemaUtilTest, NonExistentParentType) {
+ // Create a schema with the following inheritance relation:
+ // (does not exist) X <- A <- B <- C
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder().SetType("A").SetParentType("X").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(SchemaUtilTest, SimpleInheritanceWithNestedType) {
+ // Create a schema with the following dependent relation:
+ // A - B (via inheritance)
+ // B - C (via nested document)
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "C", UnorderedElementsAre(Pointee(
+ EqualsProto(type_c.properties(0)))))));
+}
+
+TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / \
+ // B E
+ // / \
+ // C D
+ // |
+ // F
+ // Approach:
+ // B extends A
+ // C extends B
+ // D has a nested document of type B
+ // E has a nested document of type A
+ // F has a nested document of type D
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(
+ d_map["A"],
+ UnorderedElementsAre(
+ Pair("B", IsEmpty()), Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+ Pair("E", UnorderedElementsAre(
+ Pointee(EqualsProto(type_e.properties(0))))),
+ Pair("F", IsEmpty())));
+ EXPECT_THAT(
+ d_map["B"],
+ UnorderedElementsAre(Pair("C", IsEmpty()),
+ Pair("D", UnorderedElementsAre(Pointee(
+ EqualsProto(type_d.properties(0))))),
+ Pair("F", IsEmpty())));
+ EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair(
+ "F", UnorderedElementsAre(Pointee(
+ EqualsProto(type_f.properties(0)))))));
+}
+
+TEST(SchemaUtilTest, InheritanceWithNestedTypeCycle) {
+ // Create a schema that A and B depend on each other, in the sense that B
+ // extends A but A has a nested document of type B.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
TEST(SchemaUtilTest, EmptySchemaProtoIsValid) {
SchemaProto schema;
ICING_ASSERT_OK(SchemaUtil::Validate(schema));
diff --git a/icing/scoring/advanced_scoring/advanced-scorer.cc b/icing/scoring/advanced_scoring/advanced-scorer.cc
index 771615c..8925024 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer.cc
@@ -46,10 +46,10 @@ AdvancedScorer::Create(const ScoringSpecProto& scoring_spec,
ICING_ASSIGN_OR_RETURN(std::unique_ptr<SectionWeights> section_weights,
SectionWeights::Create(schema_store, scoring_spec));
std::unique_ptr<Bm25fCalculator> bm25f_calculator =
- std::make_unique<Bm25fCalculator>(document_store,
- std::move(section_weights));
+ std::make_unique<Bm25fCalculator>(document_store, section_weights.get());
ScoringVisitor visitor(default_score, document_store, schema_store,
- bm25f_calculator.get(), join_children_fetcher);
+ section_weights.get(), bm25f_calculator.get(),
+ join_children_fetcher);
tree_root->Accept(&visitor);
ICING_ASSIGN_OR_RETURN(std::unique_ptr<ScoreExpression> expression,
@@ -58,8 +58,9 @@ AdvancedScorer::Create(const ScoringSpecProto& scoring_spec,
return absl_ports::InvalidArgumentError(
"The root scoring expression is not of double type.");
}
- return std::unique_ptr<AdvancedScorer>(new AdvancedScorer(
- std::move(expression), std::move(bm25f_calculator), default_score));
+ return std::unique_ptr<AdvancedScorer>(
+ new AdvancedScorer(std::move(expression), std::move(section_weights),
+ std::move(bm25f_calculator), default_score));
}
} // namespace lib
diff --git a/icing/scoring/advanced_scoring/advanced-scorer.h b/icing/scoring/advanced_scoring/advanced-scorer.h
index 1a1cd5c..aa7914f 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer.h
+++ b/icing/scoring/advanced_scoring/advanced-scorer.h
@@ -66,9 +66,11 @@ class AdvancedScorer : public Scorer {
private:
explicit AdvancedScorer(std::unique_ptr<ScoreExpression> score_expression,
+ std::unique_ptr<SectionWeights> section_weights,
std::unique_ptr<Bm25fCalculator> bm25f_calculator,
double default_score)
: score_expression_(std::move(score_expression)),
+ section_weights_(std::move(section_weights)),
bm25f_calculator_(std::move(bm25f_calculator)),
default_score_(default_score) {
if (is_constant()) {
@@ -78,6 +80,7 @@ class AdvancedScorer : public Scorer {
}
std::unique_ptr<ScoreExpression> score_expression_;
+ std::unique_ptr<SectionWeights> section_weights_;
std::unique_ptr<Bm25fCalculator> bm25f_calculator_;
double default_score_;
};
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
index d6262b0..8db592a 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
@@ -38,7 +38,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
- schema_store.get())
+ schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr)
.ValueOrDie()
.document_store;
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
index b3a47ba..c962bc5 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer_test.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
@@ -65,7 +65,12 @@ class AdvancedScorerTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
@@ -77,8 +82,31 @@ class AdvancedScorerTest : public testing::Test {
.SetDataTypeString(
TermMatchType::PREFIX,
StringIndexingConfig::TokenizerType::PLAIN)
- .SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(
+ TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(
+ TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("phoneNumber")
+ .SetDataTypeString(
+ TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
@@ -135,6 +163,27 @@ ScoringSpecProto CreateAdvancedScoringSpec(
return scoring_spec;
}
+PropertyWeight CreatePropertyWeight(std::string path, double weight) {
+ PropertyWeight property_weight;
+ property_weight.set_path(std::move(path));
+ property_weight.set_weight(weight);
+ return property_weight;
+}
+
+TypePropertyWeights CreateTypePropertyWeights(
+ std::string schema_type, std::vector<PropertyWeight>&& property_weights) {
+ TypePropertyWeights type_property_weights;
+ type_property_weights.set_schema_type(std::move(schema_type));
+ type_property_weights.mutable_property_weights()->Reserve(
+ property_weights.size());
+
+ for (PropertyWeight& property_weight : property_weights) {
+ *type_property_weights.add_property_weights() = std::move(property_weight);
+ }
+
+ return type_property_weights;
+}
+
TEST_F(AdvancedScorerTest, InvalidAdvancedScoringSpec) {
// Empty scoring expression for advanced scoring
ScoringSpecProto scoring_spec;
@@ -541,6 +590,161 @@ TEST_F(AdvancedScorerTest, ChildrenScoresFunctionScoreExpression) {
Eq(default_score));
}
+TEST_F(AdvancedScorerTest, PropertyWeightsFunctionScoreExpression) {
+ DocumentProto test_document_1 =
+ DocumentBuilder().SetKey("namespace", "uri1").SetSchema("email").Build();
+ DocumentProto test_document_2 =
+ DocumentBuilder().SetKey("namespace", "uri2").SetSchema("person").Build();
+ DocumentProto test_document_3 =
+ DocumentBuilder().SetKey("namespace", "uri3").SetSchema("person").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_1,
+ document_store_->Put(test_document_1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_2,
+ document_store_->Put(test_document_2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_3,
+ document_store_->Put(test_document_3));
+
+ ScoringSpecProto spec_proto = CreateAdvancedScoringSpec("");
+
+ *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+ /*schema_type=*/"email",
+ {CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0)});
+ *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+ /*schema_type=*/"person",
+ {CreatePropertyWeight(/*path=*/"emailAddress", /*weight=*/0.5),
+ CreatePropertyWeight(/*path=*/"name", /*weight=*/0.8),
+ CreatePropertyWeight(/*path=*/"phoneNumber", /*weight=*/1.0)});
+
+ // Let the hit for test_document_1 match property "subject".
+ // So this.propertyWeights() for test_document_1 will return [1].
+ DocHitInfo doc_hit_info_1 = DocHitInfo(document_id_1);
+ doc_hit_info_1.UpdateSection(0);
+
+ // Let the hit for test_document_2 match properties "emailAddress" and "name".
+ // So this.propertyWeights() for test_document_2 will return [0.5, 0.8].
+ DocHitInfo doc_hit_info_2 = DocHitInfo(document_id_2);
+ doc_hit_info_2.UpdateSection(0);
+ doc_hit_info_2.UpdateSection(1);
+
+ // Let the hit for test_document_3 match properties "emailAddress", "name" and
+ // "phoneNumber". So this.propertyWeights() for test_document_3 will return
+ // [0.5, 0.8, 1].
+ DocHitInfo doc_hit_info_3 = DocHitInfo(document_id_3);
+ doc_hit_info_3.UpdateSection(0);
+ doc_hit_info_3.UpdateSection(1);
+ doc_hit_info_3.UpdateSection(2);
+
+ spec_proto.set_advanced_scoring_expression("min(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<AdvancedScorer> scorer,
+ AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10, document_store_.get(),
+ schema_store_.get()));
+ // min([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // min([0.5, 0.8]) = 0.5
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.5));
+ // min([0.5, 0.8, 1.0]) = 0.5
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(0.5));
+
+ spec_proto.set_advanced_scoring_expression("max(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10, document_store_.get(),
+ schema_store_.get()));
+ // max([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // max([0.5, 0.8]) = 0.8
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.8));
+ // max([0.5, 0.8, 1.0]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(1));
+
+ spec_proto.set_advanced_scoring_expression("sum(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10, document_store_.get(),
+ schema_store_.get()));
+ // sum([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // sum([0.5, 0.8]) = 1.3
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(1.3));
+ // sum([0.5, 0.8, 1.0]) = 2.3
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(2.3));
+}
+
+TEST_F(AdvancedScorerTest,
+ PropertyWeightsFunctionScoreExpressionUnspecifiedWeights) {
+ DocumentProto test_document_1 =
+ DocumentBuilder().SetKey("namespace", "uri1").SetSchema("email").Build();
+ DocumentProto test_document_2 =
+ DocumentBuilder().SetKey("namespace", "uri2").SetSchema("person").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_1,
+ document_store_->Put(test_document_1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_2,
+ document_store_->Put(test_document_2));
+
+ ScoringSpecProto spec_proto = CreateAdvancedScoringSpec("");
+
+ // The entry for type "email" is missing, so every properties in "email"
+ // should get weight 1.0.
+ // The weight of "phoneNumber" in "person" type is unspecified, which should
+ // default to 1/2 = 0.5
+ *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+ /*schema_type=*/"person",
+ {CreatePropertyWeight(/*path=*/"emailAddress", /*weight=*/1.0),
+ CreatePropertyWeight(/*path=*/"name", /*weight=*/2)});
+
+ // Let the hit for test_document_1 match property "subject".
+ // So this.propertyWeights() for test_document_1 will return [1].
+ DocHitInfo doc_hit_info_1 = DocHitInfo(document_id_1);
+ doc_hit_info_1.UpdateSection(0);
+
+ // Let the hit for test_document_2 match properties "emailAddress", "name" and
+ // "phoneNumber". So this.propertyWeights() for test_document_3 will return
+ // [0.5, 1, 0.5].
+ DocHitInfo doc_hit_info_2 = DocHitInfo(document_id_2);
+ doc_hit_info_2.UpdateSection(0);
+ doc_hit_info_2.UpdateSection(1);
+ doc_hit_info_2.UpdateSection(2);
+
+ spec_proto.set_advanced_scoring_expression("min(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<AdvancedScorer> scorer,
+ AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10, document_store_.get(),
+ schema_store_.get()));
+ // min([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // min([0.5, 1, 0.5]) = 0.5
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.5));
+
+ spec_proto.set_advanced_scoring_expression("max(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10, document_store_.get(),
+ schema_store_.get()));
+ // max([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // max([0.5, 1, 0.5]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(1));
+
+ spec_proto.set_advanced_scoring_expression("sum(this.propertyWeights())");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ scorer,
+ AdvancedScorer::Create(spec_proto,
+ /*default_score=*/10, document_store_.get(),
+ schema_store_.get()));
+ // sum([1]) = 1
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+ // sum([0.5, 1, 0.5]) = 2
+ EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(2));
+}
+
TEST_F(AdvancedScorerTest, InvalidChildrenScoresFunctionScoreExpression) {
const double default_score = 123;
diff --git a/icing/scoring/advanced_scoring/score-expression.cc b/icing/scoring/advanced_scoring/score-expression.cc
index 7a8f046..3851660 100644
--- a/icing/scoring/advanced_scoring/score-expression.cc
+++ b/icing/scoring/advanced_scoring/score-expression.cc
@@ -459,5 +459,58 @@ ChildrenScoresFunctionScoreExpression::eval_list(
return std::move(children_scores);
}
+libtextclassifier3::StatusOr<
+ std::unique_ptr<PropertyWeightsFunctionScoreExpression>>
+PropertyWeightsFunctionScoreExpression::Create(
+ std::vector<std::unique_ptr<ScoreExpression>> args,
+ const DocumentStore* document_store,
+ const SectionWeights* section_weights) {
+ if (args.size() != 1) {
+ return absl_ports::InvalidArgumentError(
+ "propertyWeights must have 1 argument.");
+ }
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+ if (args[0]->type() != ScoreExpressionType::kDocument) {
+ return absl_ports::InvalidArgumentError(
+ "propertyWeights must take \"this\" as its argument.");
+ }
+ return std::unique_ptr<PropertyWeightsFunctionScoreExpression>(
+ new PropertyWeightsFunctionScoreExpression(document_store,
+ section_weights));
+}
+
+libtextclassifier3::StatusOr<std::vector<double>>
+PropertyWeightsFunctionScoreExpression::eval_list(
+ const DocHitInfo& hit_info, const DocHitInfoIterator*) const {
+ std::vector<double> weights;
+ SectionIdMask sections = hit_info.hit_section_ids_mask();
+ SchemaTypeId schema_type_id = GetSchemaTypeId(hit_info.document_id());
+
+ while (sections != 0) {
+ SectionId section_id = __builtin_ctzll(sections);
+ sections &= ~(UINT64_C(1) << section_id);
+ weights.push_back(section_weights_.GetNormalizedSectionWeight(
+ schema_type_id, section_id));
+ }
+ return weights;
+}
+
+SchemaTypeId PropertyWeightsFunctionScoreExpression::GetSchemaTypeId(
+ DocumentId document_id) const {
+ auto filter_data_optional =
+ document_store_.GetAliveDocumentFilterData(document_id);
+ if (!filter_data_optional) {
+ // This should never happen. The only failure case for
+ // GetDocumentFilterData is if the document_id is outside of the range of
+ // allocated document_ids, which shouldn't be possible since we're getting
+ // this document_id from the posting lists.
+ ICING_LOG(WARNING) << "No document filter data for document ["
+ << document_id << "]";
+ return kInvalidSchemaTypeId;
+ }
+ return filter_data_optional.value().schema_type_id();
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/scoring/advanced_scoring/score-expression.h b/icing/scoring/advanced_scoring/score-expression.h
index b70cd46..ad719fe 100644
--- a/icing/scoring/advanced_scoring/score-expression.h
+++ b/icing/scoring/advanced_scoring/score-expression.h
@@ -303,6 +303,38 @@ class ChildrenScoresFunctionScoreExpression : public ScoreExpression {
const JoinChildrenFetcher& join_children_fetcher_;
};
+class PropertyWeightsFunctionScoreExpression : public ScoreExpression {
+ public:
+ static constexpr std::string_view kFunctionName = "propertyWeights";
+
+ // RETURNS:
+ // - A PropertyWeightsFunctionScoreExpression instance on success.
+ // - FAILED_PRECONDITION on any null pointer in children.
+ // - INVALID_ARGUMENT on type errors.
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<PropertyWeightsFunctionScoreExpression>>
+ Create(std::vector<std::unique_ptr<ScoreExpression>> args,
+ const DocumentStore* document_store,
+ const SectionWeights* section_weights);
+
+ libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+ const DocHitInfo& hit_info, const DocHitInfoIterator*) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDoubleList;
+ }
+
+ SchemaTypeId GetSchemaTypeId(DocumentId document_id) const;
+
+ private:
+ explicit PropertyWeightsFunctionScoreExpression(
+ const DocumentStore* document_store,
+ const SectionWeights* section_weights)
+ : document_store_(*document_store), section_weights_(*section_weights) {}
+ const DocumentStore& document_store_;
+ const SectionWeights& section_weights_;
+};
+
} // namespace lib
} // namespace icing
diff --git a/icing/scoring/advanced_scoring/scoring-visitor.cc b/icing/scoring/advanced_scoring/scoring-visitor.cc
index 059e65b..b3b831c 100644
--- a/icing/scoring/advanced_scoring/scoring-visitor.cc
+++ b/icing/scoring/advanced_scoring/scoring-visitor.cc
@@ -109,6 +109,11 @@ void ScoringVisitor::VisitFunctionHelper(const FunctionNode* node,
// childrenScores function
expression = ChildrenScoresFunctionScoreExpression::Create(
std::move(args), join_children_fetcher_);
+ } else if (function_name ==
+ PropertyWeightsFunctionScoreExpression::kFunctionName) {
+ // propertyWeights function
+ expression = PropertyWeightsFunctionScoreExpression::Create(
+ std::move(args), &document_store_, &section_weights_);
} else if (MathFunctionScoreExpression::kFunctionNames.find(function_name) !=
MathFunctionScoreExpression::kFunctionNames.end()) {
// Math functions
diff --git a/icing/scoring/advanced_scoring/scoring-visitor.h b/icing/scoring/advanced_scoring/scoring-visitor.h
index 9b01f73..9d9c105 100644
--- a/icing/scoring/advanced_scoring/scoring-visitor.h
+++ b/icing/scoring/advanced_scoring/scoring-visitor.h
@@ -33,11 +33,13 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor {
explicit ScoringVisitor(double default_score,
const DocumentStore* document_store,
const SchemaStore* schema_store,
+ SectionWeights* section_weights,
Bm25fCalculator* bm25f_calculator,
const JoinChildrenFetcher* join_children_fetcher)
: default_score_(default_score),
document_store_(*document_store),
schema_store_(*schema_store),
+ section_weights_(*section_weights),
bm25f_calculator_(*bm25f_calculator),
join_children_fetcher_(join_children_fetcher) {}
@@ -88,6 +90,7 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor {
double default_score_;
const DocumentStore& document_store_;
const SchemaStore& schema_store_;
+ SectionWeights& section_weights_;
Bm25fCalculator& bm25f_calculator_;
// A non-null join_children_fetcher_ indicates scoring in a join.
const JoinChildrenFetcher* join_children_fetcher_; // Does not own.
diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc
index f169cda..9120cc7 100644
--- a/icing/scoring/bm25f-calculator.cc
+++ b/icing/scoring/bm25f-calculator.cc
@@ -42,11 +42,9 @@ constexpr float k1_ = 1.2f;
constexpr float b_ = 0.7f;
// TODO(b/158603900): add tests for Bm25fCalculator
-Bm25fCalculator::Bm25fCalculator(
- const DocumentStore* document_store,
- std::unique_ptr<SectionWeights> section_weights)
- : document_store_(document_store),
- section_weights_(std::move(section_weights)) {}
+Bm25fCalculator::Bm25fCalculator(const DocumentStore* document_store,
+ SectionWeights* section_weights)
+ : document_store_(document_store), section_weights_(*section_weights) {}
// During initialization, Bm25fCalculator iterates through
// hit-iterators for each query term to pre-compute n(q_i) for each corpus under
@@ -219,7 +217,7 @@ float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
sections &= ~(UINT64_C(1) << section_id);
Hit::TermFrequency tf = term_match_info.term_frequencies[section_id];
- double weighted_tf = tf * section_weights_->GetNormalizedSectionWeight(
+ double weighted_tf = tf * section_weights_.GetNormalizedSectionWeight(
schema_type_id, section_id);
if (tf != Hit::kNoTermFrequency) {
sum += weighted_tf;
diff --git a/icing/scoring/bm25f-calculator.h b/icing/scoring/bm25f-calculator.h
index 05009d8..dc5e081 100644
--- a/icing/scoring/bm25f-calculator.h
+++ b/icing/scoring/bm25f-calculator.h
@@ -63,8 +63,8 @@ namespace lib {
// see: glossary/bm25
class Bm25fCalculator {
public:
- explicit Bm25fCalculator(const DocumentStore *document_store_,
- std::unique_ptr<SectionWeights> section_weights_);
+ explicit Bm25fCalculator(const DocumentStore *document_store,
+ SectionWeights *section_weights);
// Precompute and cache statistics relevant to BM25F.
// Populates term_id_map_ and corpus_nqi_map_ for use while scoring other
@@ -145,7 +145,7 @@ class Bm25fCalculator {
// Used for accessing normalized section weights when computing the weighted
// term frequency.
- std::unique_ptr<SectionWeights> section_weights_;
+ SectionWeights &section_weights_;
// Map from query term to compact term ID.
// Necessary as a key to the other maps.
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index 9a126dc..076e36a 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -89,6 +89,17 @@ DocumentProto CreateEmailDocument(int id, int document_score,
.Build();
}
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
const std::string base_dir = GetTestTempDir() + "/score_and_rank_benchmark";
const std::string document_store_dir = base_dir + "/document_store";
@@ -107,8 +118,8 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -208,8 +219,8 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -312,8 +323,8 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -410,8 +421,8 @@ void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
diff --git a/icing/scoring/scorer-factory.cc b/icing/scoring/scorer-factory.cc
index d4d8b79..5b8609b 100644
--- a/icing/scoring/scorer-factory.cc
+++ b/icing/scoring/scorer-factory.cc
@@ -78,8 +78,10 @@ class DocumentCreationTimestampScorer : public Scorer {
class RelevanceScoreScorer : public Scorer {
public:
explicit RelevanceScoreScorer(
+ std::unique_ptr<SectionWeights> section_weights,
std::unique_ptr<Bm25fCalculator> bm25f_calculator, double default_score)
- : bm25f_calculator_(std::move(bm25f_calculator)),
+ : section_weights_(std::move(section_weights)),
+ bm25f_calculator_(std::move(bm25f_calculator)),
default_score_(default_score) {}
void PrepareToScore(
@@ -99,6 +101,7 @@ class RelevanceScoreScorer : public Scorer {
}
private:
+ std::unique_ptr<SectionWeights> section_weights_;
std::unique_ptr<Bm25fCalculator> bm25f_calculator_;
double default_score_;
};
@@ -193,8 +196,9 @@ libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
SectionWeights::Create(schema_store, scoring_spec));
auto bm25f_calculator = std::make_unique<Bm25fCalculator>(
- document_store, std::move(section_weights));
- return std::make_unique<RelevanceScoreScorer>(std::move(bm25f_calculator),
+ document_store, section_weights.get());
+ return std::make_unique<RelevanceScoreScorer>(std::move(section_weights),
+ std::move(bm25f_calculator),
default_score);
}
case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index 08705a1..2649c95 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -65,7 +65,12 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock1_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 1e579be..5c42236 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -63,7 +63,12 @@ class ScoringProcessorTest
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get()));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
index c9769f2..2abd315 100644
--- a/icing/store/document-log-creator.cc
+++ b/icing/store/document-log-creator.cc
@@ -66,7 +66,8 @@ std::string DocumentLogCreator::GetDocumentLogFilename() {
libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
DocumentLogCreator::Create(const Filesystem* filesystem,
- const std::string& base_dir) {
+ const std::string& base_dir,
+ int32_t compression_level) {
bool v0_exists =
filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
bool v1_exists =
@@ -75,7 +76,8 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
bool new_file = false;
int preexisting_file_version = kCurrentVersion;
if (v0_exists && !v1_exists) {
- ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
+ ICING_RETURN_IF_ERROR(
+ MigrateFromV0ToV1(filesystem, base_dir, compression_level));
// Need to regenerate derived files since documents may be written to a
// different file offset in the log.
@@ -94,7 +96,9 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
PortableFileBackedProtoLog<DocumentWrapper>::Create(
filesystem, MakeDocumentLogFilenameV1(base_dir),
PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
+ /*compress_in=*/true,
+ PortableFileBackedProtoLog<DocumentWrapper>::kMaxProtoSize,
+ compression_level)));
CreateResult create_result = {std::move(log_create_result),
preexisting_file_version, new_file};
@@ -102,15 +106,15 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
}
libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
- const Filesystem* filesystem, const std::string& base_dir) {
+ const Filesystem* filesystem, const std::string& base_dir,
+ int32_t compression_level) {
ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
// Our v0 proto log was non-portable, create it so we can read protos out from
// it.
auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
filesystem, MakeDocumentLogFilenameV0(base_dir),
- FileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true));
+ FileBackedProtoLog<DocumentWrapper>::Options(/*compress_in=*/true));
if (!v0_create_result_or.ok()) {
return absl_ports::Annotate(
v0_create_result_or.status(),
@@ -127,7 +131,10 @@ libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
PortableFileBackedProtoLog<DocumentWrapper>::Create(
filesystem, MakeDocumentLogFilenameV1(base_dir),
PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true));
+ /*compress_in=*/true,
+ /*max_proto_size_in=*/
+ PortableFileBackedProtoLog<DocumentWrapper>::kMaxProtoSize,
+ /*compression_level_in=*/compression_level));
if (!v1_create_result_or.ok()) {
return absl_ports::Annotate(
v1_create_result_or.status(),
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
index be8feed..0c2794a 100644
--- a/icing/store/document-log-creator.h
+++ b/icing/store/document-log-creator.h
@@ -57,7 +57,8 @@ class DocumentLogCreator {
// CreateResult on success.
// INTERNAL on any I/O error.
static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create(
- const Filesystem* filesystem, const std::string& base_dir);
+ const Filesystem* filesystem, const std::string& base_dir,
+ int32_t compression_level);
// Returns the filename of the document log, without any directory prefixes.
// Used mainly for testing purposes.
@@ -74,7 +75,8 @@ class DocumentLogCreator {
// INVALID_ARGUMENT if some invalid option was passed to the document log.
// INTERNAL on I/O error.
static libtextclassifier3::Status MigrateFromV0ToV1(
- const Filesystem* filesystem, const std::string& base_dir);
+ const Filesystem* filesystem, const std::string& base_dir,
+ int32_t compression_level);
};
} // namespace lib
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index a81ae4b..ae8bfc0 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -230,13 +230,15 @@ DocumentStore::DocumentStore(const Filesystem* filesystem,
const std::string_view base_dir,
const Clock* clock,
const SchemaStore* schema_store,
- bool namespace_id_fingerprint)
+ bool namespace_id_fingerprint,
+ int32_t compression_level)
: filesystem_(filesystem),
base_dir_(base_dir),
clock_(*clock),
schema_store_(schema_store),
document_validator_(schema_store),
- namespace_id_fingerprint_(namespace_id_fingerprint) {}
+ namespace_id_fingerprint_(namespace_id_fingerprint),
+ compression_level_(compression_level) {}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
const DocumentProto& document, int32_t num_tokens,
@@ -264,13 +266,14 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
bool force_recovery_and_revalidate_documents, bool namespace_id_fingerprint,
- InitializeStatsProto* initialize_stats) {
+ int32_t compression_level, InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(schema_store);
auto document_store = std::unique_ptr<DocumentStore>(new DocumentStore(
- filesystem, base_dir, clock, schema_store, namespace_id_fingerprint));
+ filesystem, base_dir, clock, schema_store, namespace_id_fingerprint,
+ compression_level));
ICING_ASSIGN_OR_RETURN(
DataLoss data_loss,
document_store->Initialize(force_recovery_and_revalidate_documents,
@@ -285,7 +288,8 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
- auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_);
+ auto create_result_or =
+ DocumentLogCreator::Create(filesystem_, base_dir_, compression_level_);
// TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
@@ -1725,6 +1729,7 @@ libtextclassifier3::Status DocumentStore::Optimize() {
libtextclassifier3::StatusOr<std::vector<DocumentId>>
DocumentStore::OptimizeInto(const std::string& new_directory,
const LanguageSegmenter* lang_segmenter,
+ bool namespace_id_fingerprint,
OptimizeStatsProto* stats) {
// Validates directory
if (new_directory == base_dir_) {
@@ -1732,9 +1737,12 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
"New directory is the same as the current one.");
}
- ICING_ASSIGN_OR_RETURN(auto doc_store_create_result,
- DocumentStore::Create(filesystem_, new_directory,
- &clock_, schema_store_));
+ ICING_ASSIGN_OR_RETURN(
+ auto doc_store_create_result,
+ DocumentStore::Create(filesystem_, new_directory, &clock_, schema_store_,
+ /*force_recovery_and_revalidate_documents=*/false,
+ namespace_id_fingerprint, compression_level_,
+ /*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> new_doc_store =
std::move(doc_store_create_result.document_store);
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 6feed2e..88050ce 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -141,9 +141,10 @@ class DocumentStore {
static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- bool force_recovery_and_revalidate_documents = false,
- bool namespace_id_fingerprint = false,
- InitializeStatsProto* initialize_stats = nullptr);
+ bool force_recovery_and_revalidate_documents,
+ bool namespace_id_fingerprint,
+ int32_t compression_level,
+ InitializeStatsProto* initialize_stats);
// Returns the maximum DocumentId that the DocumentStore has assigned. If
// there has not been any DocumentIds assigned, i.e. the DocumentStore is
@@ -445,7 +446,7 @@ class DocumentStore {
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeInto(
const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
- OptimizeStatsProto* stats = nullptr);
+ bool namespace_id_fingerprint, OptimizeStatsProto* stats = nullptr);
// Calculates status for a potential Optimize call. Includes how many docs
// there are vs how many would be optimized away. And also includes an
@@ -479,7 +480,7 @@ class DocumentStore {
// Use DocumentStore::Create() to instantiate.
DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
const Clock* clock, const SchemaStore* schema_store,
- bool namespace_id_fingerprint);
+ bool namespace_id_fingerprint, int32_t compression_level);
const Filesystem* const filesystem_;
const std::string base_dir_;
@@ -496,6 +497,8 @@ class DocumentStore {
// document_key_mapper_ and corpus_mapper_.
bool namespace_id_fingerprint_;
+ const int32_t compression_level_;
+
// A log used to store all documents, it serves as a ground truth of doc
// store. key_mapper_ and document_id_mapper_ can be regenerated from it.
std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index a4b3a17..99e17c7 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -124,6 +124,17 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem,
return schema_store;
}
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+}
+
void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
Filesystem filesystem;
Clock clock;
@@ -138,8 +149,8 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -178,8 +189,8 @@ void BM_Put(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -207,8 +218,8 @@ void BM_GetSameDocument(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -234,8 +245,8 @@ void BM_Delete(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -268,8 +279,8 @@ void BM_Create(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -284,7 +295,7 @@ void BM_Create(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
for (auto s : state) {
- benchmark::DoNotOptimize(DocumentStore::Create(
+ benchmark::DoNotOptimize(CreateDocumentStore(
&filesystem, document_store_dir, &clock, schema_store.get()));
}
}
@@ -304,8 +315,8 @@ void BM_ComputeChecksum(benchmark::State& state) {
filesystem.CreateDirectoryRecursively(document_store_dir.data());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem, document_store_dir, &clock,
- schema_store.get()));
+ CreateDocumentStore(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 1d9a2a0..896d852 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -215,6 +215,17 @@ class DocumentStoreTest : public ::testing::Test {
filesystem_.Write(header_file.c_str(), &header, sizeof(header));
}
+ libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store) {
+ return DocumentStore::Create(
+ filesystem, base_dir, clock, schema_store,
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr);
+ }
+
const Filesystem filesystem_;
const std::string test_dir_;
FakeClock fake_clock_;
@@ -240,16 +251,16 @@ class DocumentStoreTest : public ::testing::Test {
};
TEST_F(DocumentStoreTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(DocumentStore::Create(/*filesystem=*/nullptr, document_store_dir_,
- &fake_clock_, schema_store_.get()),
+ EXPECT_THAT(CreateDocumentStore(/*filesystem=*/nullptr, document_store_dir_,
+ &fake_clock_, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(DocumentStore::Create(&filesystem_, document_store_dir_,
- /*clock=*/nullptr, schema_store_.get()),
+ EXPECT_THAT(CreateDocumentStore(&filesystem_, document_store_dir_,
+ /*clock=*/nullptr, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(DocumentStore::Create(&filesystem_, document_store_dir_,
- &fake_clock_, /*schema_store=*/nullptr),
+ EXPECT_THAT(CreateDocumentStore(&filesystem_, document_store_dir_,
+ &fake_clock_, /*schema_store=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
@@ -257,16 +268,16 @@ TEST_F(DocumentStoreTest, CreationWithBadFilesystemShouldFail) {
MockFilesystem mock_filesystem;
ON_CALL(mock_filesystem, OpenForWrite(_)).WillByDefault(Return(false));
- EXPECT_THAT(DocumentStore::Create(&mock_filesystem, document_store_dir_,
- &fake_clock_, schema_store_.get()),
+ EXPECT_THAT(CreateDocumentStore(&mock_filesystem, document_store_dir_,
+ &fake_clock_, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -285,8 +296,8 @@ TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -318,8 +329,8 @@ TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
TEST_F(DocumentStoreTest, PutSameKey) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -347,8 +358,8 @@ TEST_F(DocumentStoreTest, PutSameKey) {
TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -378,8 +389,8 @@ TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -405,8 +416,8 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -433,8 +444,8 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -460,8 +471,8 @@ TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -486,8 +497,8 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -515,8 +526,8 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -535,8 +546,8 @@ TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -579,8 +590,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -604,8 +615,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -642,8 +653,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -669,8 +680,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -710,8 +721,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -779,8 +790,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -805,8 +816,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -853,8 +864,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -879,8 +890,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -900,8 +911,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
TEST_F(DocumentStoreTest, PutDeleteThenPut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
ICING_EXPECT_OK(doc_store->Put(test_document1_));
@@ -944,8 +955,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -983,8 +994,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -1004,8 +1015,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
TEST_F(DocumentStoreTest, OptimizeInto) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1045,7 +1056,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
// Optimizing into the same directory is not allowed
EXPECT_THAT(
- doc_store->OptimizeInto(document_store_dir_, lang_segmenter_.get()),
+ doc_store->OptimizeInto(document_store_dir_, lang_segmenter_.get(),
+ /*namespace_id_fingerprint=*/false),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
HasSubstr("directory is the same")));
@@ -1057,7 +1069,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
// deleted
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
+ EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
+ /*namespace_id_fingerprint=*/false),
IsOkAndHolds(ElementsAre(0, 1, 2)));
int64_t optimized_size1 =
filesystem_.GetFileSize(optimized_document_log.c_str());
@@ -1069,7 +1082,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1"));
// DocumentId 0 is removed.
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
+ EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
+ /*namespace_id_fingerprint=*/false),
IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, 1)));
int64_t optimized_size2 =
filesystem_.GetFileSize(optimized_document_log.c_str());
@@ -1085,7 +1099,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
// DocumentId 0 is removed, and DocumentId 2 is expired.
EXPECT_THAT(
- doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
+ /*namespace_id_fingerprint=*/false),
IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId)));
int64_t optimized_size3 =
filesystem_.GetFileSize(optimized_document_log.c_str());
@@ -1096,7 +1111,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2"));
// DocumentId 0 and 1 is removed, and DocumentId 2 is expired.
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
+ EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
+ /*namespace_id_fingerprint=*/false),
IsOkAndHolds(ElementsAre(kInvalidDocumentId, kInvalidDocumentId,
kInvalidDocumentId)));
int64_t optimized_size4 =
@@ -1107,14 +1123,15 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
TEST_F(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
std::string optimized_dir = document_store_dir_ + "_optimize";
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
+ EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
+ /*namespace_id_fingerprint=*/false),
IsOkAndHolds(IsEmpty()));
}
@@ -1124,8 +1141,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1177,8 +1194,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1212,8 +1229,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1268,8 +1285,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
// NOTE: this doesn't trigger RegenerateDerivedFiles.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1305,8 +1322,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1345,8 +1362,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1377,8 +1394,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
TEST_F(DocumentStoreTest, GetStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1408,9 +1425,8 @@ TEST_F(DocumentStoreTest, GetStorageInfo) {
ON_CALL(mock_filesystem, GetDiskUsage(A<const char*>()))
.WillByDefault(Return(Filesystem::kBadFileSize));
ICING_ASSERT_OK_AND_ASSIGN(
- create_result,
- DocumentStore::Create(&mock_filesystem, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ create_result, CreateDocumentStore(&mock_filesystem, document_store_dir_,
+ &fake_clock_, schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem =
std::move(create_result.document_store);
@@ -1421,8 +1437,8 @@ TEST_F(DocumentStoreTest, GetStorageInfo) {
TEST_F(DocumentStoreTest, MaxDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1445,8 +1461,8 @@ TEST_F(DocumentStoreTest, MaxDocumentId) {
TEST_F(DocumentStoreTest, GetNamespaceId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1478,8 +1494,8 @@ TEST_F(DocumentStoreTest, GetNamespaceId) {
TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1498,8 +1514,8 @@ TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1510,8 +1526,8 @@ TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1531,8 +1547,8 @@ TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
TEST_F(DocumentStoreTest, GetCorpusId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1566,8 +1582,8 @@ TEST_F(DocumentStoreTest, GetCorpusId) {
TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1590,8 +1606,8 @@ TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) {
TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1615,8 +1631,8 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1653,8 +1669,8 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) {
TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1665,8 +1681,8 @@ TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1709,8 +1725,8 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1753,8 +1769,8 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1765,8 +1781,8 @@ TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1776,8 +1792,8 @@ TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1800,8 +1816,8 @@ TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1824,8 +1840,8 @@ TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1859,8 +1875,8 @@ TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1914,8 +1930,8 @@ TEST_F(DocumentStoreTest,
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1939,8 +1955,8 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -1969,8 +1985,8 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -2002,8 +2018,8 @@ TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
fake_clock_.SetSystemTimeMilliseconds(fake_real_time);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -2035,8 +2051,8 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -2061,8 +2077,8 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2076,8 +2092,8 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2087,8 +2103,8 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
// Destroy the previous instance and recreate DocumentStore
document_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- create_result, DocumentStore::Create(&filesystem_, document_store_dir_,
- &fake_clock_, schema_store_.get()));
+ create_result, CreateDocumentStore(&filesystem_, document_store_dir_,
+ &fake_clock_, schema_store_.get()));
document_store = std::move(create_result.document_store);
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
@@ -2097,8 +2113,8 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2113,8 +2129,8 @@ TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2170,8 +2186,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2225,8 +2241,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
// because the "message" schema type is missing
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2292,8 +2308,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
// Add the documents and check SchemaTypeIds match
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2382,8 +2398,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2452,8 +2468,8 @@ TEST_F(DocumentStoreTest,
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2524,8 +2540,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
// Add the documents and check SchemaTypeIds match
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2616,8 +2632,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2689,8 +2705,8 @@ TEST_F(DocumentStoreTest,
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2729,8 +2745,8 @@ TEST_F(DocumentStoreTest,
TEST_F(DocumentStoreTest, GetOptimizeInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2763,11 +2779,12 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
EXPECT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
EXPECT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
ICING_ASSERT_OK(
- document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ document_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
+ /*namespace_id_fingerprint=*/false));
document_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- create_result, DocumentStore::Create(&filesystem_, optimized_dir,
- &fake_clock_, schema_store_.get()));
+ create_result, CreateDocumentStore(&filesystem_, optimized_dir,
+ &fake_clock_, schema_store_.get()));
std::unique_ptr<DocumentStore> optimized_document_store =
std::move(create_result.document_store);
@@ -2781,8 +2798,8 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
TEST_F(DocumentStoreTest, GetAllNamespaces) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2851,8 +2868,8 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) {
TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2937,8 +2954,8 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -2987,8 +3004,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3012,8 +3029,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3030,8 +3047,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3066,8 +3083,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3081,8 +3098,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3119,8 +3136,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3149,12 +3166,13 @@ TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
std::string optimized_dir = document_store_dir_ + "/optimize_test";
filesystem_.CreateDirectoryRecursively(optimized_dir.c_str());
ICING_ASSERT_OK(
- document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ document_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
+ /*namespace_id_fingerprint=*/false));
// Get optimized document store
ICING_ASSERT_OK_AND_ASSIGN(
- create_result, DocumentStore::Create(&filesystem_, optimized_dir,
- &fake_clock_, schema_store_.get()));
+ create_result, CreateDocumentStore(&filesystem_, optimized_dir,
+ &fake_clock_, schema_store_.get()));
std::unique_ptr<DocumentStore> optimized_document_store =
std::move(create_result.document_store);
@@ -3171,8 +3189,8 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
@@ -3200,8 +3218,8 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
@@ -3215,8 +3233,8 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
@@ -3265,8 +3283,8 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
@@ -3314,7 +3332,9 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
DocumentStore::Create(
&filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false, &initialize_stats));
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// The document log is using the legacy v0 format so that a migration is
@@ -3326,8 +3346,8 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3464,8 +3484,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
// Create the document store the first time and add an email document.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3515,10 +3535,13 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(
- &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/true,
- /*namespace_id_fingerprint=*/false, &initialize_stats));
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3564,8 +3587,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
// Create the document store the first time and add an email document.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3614,9 +3637,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
// Create the document store the second time. Don't force recovery.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(
- &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3680,8 +3702,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
// that has the 'body' section and one that doesn't.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3719,9 +3741,13 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
CorruptDocStoreHeaderChecksumFile();
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(
- &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/true));
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3785,8 +3811,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
// that has the 'body' section and one that doesn't.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3825,9 +3851,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
CorruptDocStoreHeaderChecksumFile();
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(
- &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3904,7 +3929,9 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
DocumentStore::Create(
&filesystem_, document_store_dir, &fake_clock_, schema_store.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false, &initialize_stats));
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -3992,8 +4019,8 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -4092,8 +4119,8 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -4109,8 +4136,8 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/util/tokenized-document_test.cc b/icing/util/tokenized-document_test.cc
index f2a9214..c0b20bb 100644
--- a/icing/util/tokenized-document_test.cc
+++ b/icing/util/tokenized-document_test.cc
@@ -46,7 +46,7 @@ namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
-using ::testing::EqualsProto;
+using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::IsEmpty;
using ::testing::SizeIs;
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index 40a0d0c..db5dbed 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
-// Next tag: 7
+// Next tag: 8
message IcingSearchEngineOptions {
// Directory to persist files for Icing. Required.
// If Icing was previously initialized with this directory, it will reload
@@ -75,6 +75,13 @@ message IcingSearchEngineOptions {
//
// Default to 0 for better rollout of the new index optimize.
optional float optimize_rebuild_index_threshold = 6 [default = 0.0];
+
+ // Level of compression, NO_COMPRESSION = 0, BEST_SPEED = 1,
+ // BEST_COMPRESSION = 9
+ // Valid values: [0, 9]
+ // Optional.
+ optional int32 compression_level = 7 [default = 3];
+
reserved 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index edfcf40..04f655d 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -143,7 +143,7 @@ message PutDocumentStatsProto {
// Stats of the top-level function IcingSearchEngine::Search() and
// IcingSearchEngine::GetNextPage().
-// Next tag: 21
+// Next tag: 23
message QueryStatsProto {
// The UTF-8 length of the query string
optional int32 query_length = 16;
@@ -207,6 +207,12 @@ message QueryStatsProto {
// Time used to send protos across the JNI boundary from native to java side.
optional int32 native_to_java_jni_latency_ms = 20;
+ // The native latency due to the join operation.
+ optional int32 join_latency_ms = 21;
+
+ // Number of documents scored.
+ optional int32 num_joined_results_returned_current_page = 22;
+
reserved 9;
}
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index f471426..dc625fc 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -34,7 +34,7 @@ option objc_class_prefix = "ICNG";
// TODO(cassiewang) Define a sample proto file that can be used by tests and for
// documentation.
//
-// Next tag: 6
+// Next tag: 7
message SchemaTypeConfigProto {
// REQUIRED: Named type that uniquely identifies the structured, logical
// schema being defined.
@@ -60,6 +60,12 @@ message SchemaTypeConfigProto {
// it will default to value == 0.
optional int32 version = 5;
+ // An experimental field to make the type as a subtype of parent_type, which
+ // enables parent_type to be interpreted as its subtypes in the context of the
+ // Search APIs, including schema type filters and projections specified in
+ // TypePropertyMask.
+ optional string parent_type = 6;
+
reserved 2, 3;
}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index c5bddab..0d6bfb4 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=520930446)
+set(synced_AOSP_CL_number=524885330)