diff options
author | Jiayu Hu <hujiayu@google.com> | 2023-04-19 12:31:23 -0700 |
---|---|---|
committer | Jiayu Hu <hujiayu@google.com> | 2023-04-19 12:33:32 -0700 |
commit | a7d57e98ea7168d66cf01ace85598e33d5e9e5db (patch) | |
tree | 6685971fd4a027f52d674ee646a79ab41d4ce24f | |
parent | b59049b030cc330b6c8ae1d03ea4c1a34235ac9b (diff) | |
download | icing-a7d57e98ea7168d66cf01ace85598e33d5e9e5db.tar.gz |
Update Icing from upstream.
Descriptions:
========================================================================
Adds join info to QueryStatsProto
========================================================================
[Join][cleanup][1/x] Add Icing benchmark for qualified id join
========================================================================
[NumericSearch][benchmark][2/x] Modify run-benchmarks.py to onboard numeric search benchmark
========================================================================
[NumericSearch][benchmark][1/x] Add numeric search icing search engine benchmark
========================================================================
Switch Iterator access type used in SnippetRetriever back to kForwardIterator.
========================================================================
Fix `DocumentStore::OptimizeInto` by passing `options_.document_store_namespace_id_fingerprint()`
========================================================================
Add file size check when initializing mmap for existing metadata file.
========================================================================
Add compression level option to Icing and propagate down to PortableFileBackedProtoLog
========================================================================
Add portable_equals_proto namespace for EqualsProto in tokenized-document_test.
========================================================================
Refactor schema to check consistency of polymorphism by dependency map
========================================================================
Support the `this.propertyWeights()` function in advanced scoring
========================================================================
Randomly set optimize_rebuild_index_threshold in monkey test
========================================================================
Change benchmark batch query latency to average
========================================================================
Performance improvements to SnippetRetriever.
========================================================================
Bug: 270102295
Bug: 260625837
Bug: 269295094
Bug: 193244409
Bug: 269513147
Bug: 276345448
Bug: 259744228
Bug: 272145329
Bug: 276349029
Change-Id: Ief1cb2c21f67aac420d6b79d9b946bbecdc39a5c
58 files changed, 2271 insertions, 457 deletions
diff --git a/icing/document-builder.h b/icing/document-builder.h index ba68ec5..44500f9 100644 --- a/icing/document-builder.h +++ b/icing/document-builder.h @@ -78,11 +78,25 @@ class DocumentBuilder { return AddStringProperty(std::move(property_name), {string_values...}); } + // Takes a property name and iterator of int64_t values. + template <typename InputIt> + DocumentBuilder& AddInt64Property(std::string property_name, InputIt first, + InputIt last) { + auto property = document_.add_properties(); + property->set_name(std::move(property_name)); + for (InputIt it = first; it != last; ++it) { + property->mutable_int64_values()->Add(*it); + } + return *this; + } + // Takes a property name and any number of int64_t values. template <typename... V> DocumentBuilder& AddInt64Property(std::string property_name, V... int64_values) { - return AddInt64Property(std::move(property_name), {int64_values...}); + std::initializer_list<int64_t> int64_values_list = {int64_values...}; + return AddInt64Property(std::move(property_name), int64_values_list.begin(), + int64_values_list.end()); } // Takes a property name and any number of double values. @@ -128,16 +142,6 @@ class DocumentBuilder { return *this; } - DocumentBuilder& AddInt64Property( - std::string property_name, std::initializer_list<int64_t> int64_values) { - auto property = document_.add_properties(); - property->set_name(std::move(property_name)); - for (int64_t int64_value : int64_values) { - property->mutable_int64_values()->Add(int64_value); - } - return *this; - } - DocumentBuilder& AddDoubleProperty( std::string property_name, std::initializer_list<double> double_values) { auto property = document_.add_properties(); diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h index 48e3501..a36bd9e 100644 --- a/icing/file/portable-file-backed-proto-log.h +++ b/icing/file/portable-file-backed-proto-log.h @@ -106,13 +106,32 @@ class PortableFileBackedProtoLog { // compressed size larger than max_proto_size are also not accepted. const int32_t max_proto_size; + // Level of compression if enabled, NO_COMPRESSION = 0, BEST_SPEED = 1, + // BEST_COMPRESSION = 9 + const int32_t compression_level; + // Must specify values for options. Options() = delete; - explicit Options(bool compress_in, - const int32_t max_proto_size_in = kMaxProtoSize) - : compress(compress_in), max_proto_size(max_proto_size_in) {} + explicit Options( + bool compress_in, const int32_t max_proto_size_in = kMaxProtoSize, + const int32_t compression_level_in = kDeflateCompressionLevel) + : compress(compress_in), + max_proto_size(max_proto_size_in), + compression_level(compression_level_in) {} }; + // Our internal max for protos. + // + // WARNING: Changing this to a larger number may invalidate our assumption + // that that proto size can safely be stored in the last 3 bytes of the proto + // header. + static constexpr int kMaxProtoSize = (1 << 24) - 1; // 16MiB + static_assert(kMaxProtoSize <= 0x00FFFFFF, + "kMaxProtoSize doesn't fit in 3 bytes"); + + // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9 + static constexpr int kDeflateCompressionLevel = 3; + // Number of bytes we reserve for the heading at the beginning of the proto // log. We reserve this so the header can grow without running into the // contents of the proto log, triggering an unnecessary migration of the data. @@ -480,7 +499,8 @@ class PortableFileBackedProtoLog { // Object can only be instantiated via the ::Create factory. PortableFileBackedProtoLog(const Filesystem* filesystem, const std::string& file_path, - std::unique_ptr<Header> header); + std::unique_ptr<Header> header, + int32_t compression_level); // Initializes a new proto log. // @@ -556,18 +576,6 @@ class PortableFileBackedProtoLog { // protos we support. static constexpr uint8_t kProtoMagic = 0x5C; - // Our internal max for protos. - // - // WARNING: Changing this to a larger number may invalidate our assumption - // that that proto size can safely be stored in the last 3 bytes of the proto - // header. - static constexpr int kMaxProtoSize = (1 << 24) - 1; // 16MiB - static_assert(kMaxProtoSize <= 0x00FFFFFF, - "kMaxProtoSize doesn't fit in 3 bytes"); - - // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9 - static constexpr int kDeflateCompressionLevel = 3; - // Chunks of the file to mmap at a time, so we don't mmap the entire file. // Only used on 32-bit devices static constexpr int kMmapChunkSize = 4 * 1024 * 1024; // 4MiB @@ -576,15 +584,17 @@ class PortableFileBackedProtoLog { const Filesystem* const filesystem_; const std::string file_path_; std::unique_ptr<Header> header_; + const int32_t compression_level_; }; template <typename ProtoT> PortableFileBackedProtoLog<ProtoT>::PortableFileBackedProtoLog( const Filesystem* filesystem, const std::string& file_path, - std::unique_ptr<Header> header) + std::unique_ptr<Header> header, int32_t compression_level) : filesystem_(filesystem), file_path_(file_path), - header_(std::move(header)) { + header_(std::move(header)), + compression_level_(compression_level) { fd_.reset(filesystem_->OpenForAppend(file_path.c_str())); } @@ -617,6 +627,12 @@ PortableFileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem, options.max_proto_size)); } + if (options.compression_level < 0 || options.compression_level > 9) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "options.compression_level must be between 0 and 9 inclusive, was %d", + options.compression_level)); + } + if (!filesystem->FileExists(file_path.c_str())) { return InitializeNewFile(filesystem, file_path, options); } @@ -660,7 +676,8 @@ PortableFileBackedProtoLog<ProtoT>::InitializeNewFile( CreateResult create_result = { std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>( new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path, - std::move(header))), + std::move(header), + options.compression_level)), /*data_loss=*/DataLoss::NONE, /*recalculated_checksum=*/false}; return create_result; @@ -788,7 +805,8 @@ PortableFileBackedProtoLog<ProtoT>::InitializeExistingFile( CreateResult create_result = { std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>( new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path, - std::move(header))), + std::move(header), + options.compression_level)), data_loss, recalculated_checksum}; return create_result; @@ -893,7 +911,7 @@ PortableFileBackedProtoLog<ProtoT>::WriteProto(const ProtoT& proto) { if (header_->GetCompressFlag()) { protobuf_ports::GzipOutputStream::Options options; options.format = protobuf_ports::GzipOutputStream::ZLIB; - options.compression_level = kDeflateCompressionLevel; + options.compression_level = compression_level_; protobuf_ports::GzipOutputStream compressing_stream(&proto_stream, options); diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc index af09d18..bf5e604 100644 --- a/icing/file/portable-file-backed-proto-log_test.cc +++ b/icing/file/portable-file-backed-proto-log_test.cc @@ -73,24 +73,18 @@ class PortableFileBackedProtoLogTest : public ::testing::Test { const Filesystem filesystem_; std::string file_path_; bool compress_ = true; + int32_t compression_level_ = + PortableFileBackedProtoLog<DocumentProto>::kDeflateCompressionLevel; int64_t max_proto_size_ = 256 * 1024; // 256 KiB }; TEST_F(PortableFileBackedProtoLogTest, Initialize) { - // max_proto_size must be greater than 0 - int invalid_max_proto_size = 0; - ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create( - &filesystem_, file_path_, - PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, invalid_max_proto_size)), - StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); - ICING_ASSERT_OK_AND_ASSIGN( PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, - PortableFileBackedProtoLog<DocumentProto>::Options(compress_, - max_proto_size_))); + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size_, compression_level_))); EXPECT_THAT(create_result.proto_log, NotNull()); EXPECT_FALSE(create_result.has_data_loss()); EXPECT_FALSE(create_result.recalculated_checksum); @@ -99,7 +93,41 @@ TEST_F(PortableFileBackedProtoLogTest, Initialize) { ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - !compress_, max_proto_size_)), + !compress_, max_proto_size_, compression_level_)), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_F(PortableFileBackedProtoLogTest, InitializeValidatesOptions) { + // max_proto_size must be greater than 0 + int invalid_max_proto_size = 0; + ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, invalid_max_proto_size, compression_level_)), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + + // max_proto_size must be under 16 MiB + invalid_max_proto_size = 16 * 1024 * 1024; + ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, invalid_max_proto_size, compression_level_)), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + + // compression_level must be between 0 and 9 inclusive + int invalid_compression_level = -1; + ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size_, invalid_compression_level)), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + + // compression_level must be between 0 and 9 inclusive + invalid_compression_level = 10; + ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size_, invalid_compression_level)), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } @@ -108,8 +136,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) { PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, - PortableFileBackedProtoLog<DocumentProto>::Options(compress_, - max_proto_size_))); + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size_, compression_level_))); // With no protos written yet, the log should be minimum the size of the // reserved header space. @@ -123,8 +151,8 @@ TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) { PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, - PortableFileBackedProtoLog<DocumentProto>::Options(compress_, - max_proto_size))); + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -140,8 +168,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReadProtoWrongKProtoMagic) { PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, - PortableFileBackedProtoLog<DocumentProto>::Options(compress_, - max_proto_size_))); + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -175,7 +203,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - /*compress_in=*/false, max_proto_size_))); + /*compress_in=*/false, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -222,7 +250,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - /*compress_in=*/false, max_proto_size_))); + /*compress_in=*/false, max_proto_size_, compression_level_))); auto recreated_proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -244,7 +272,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - /*compress_in=*/true, max_proto_size_))); + /*compress_in=*/true, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -291,7 +319,7 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - /*compress_in=*/true, max_proto_size_))); + /*compress_in=*/true, max_proto_size_, compression_level_))); auto recreated_proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -304,6 +332,168 @@ TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) { } } +TEST_F(PortableFileBackedProtoLogTest, ReadWriteDifferentCompressionLevel) { + int document1_offset; + int document2_offset; + int document3_offset; + + // The first proto to write that's close to the max size. Leave some room for + // the rest of the proto properties. + std::string long_str(max_proto_size_ - 1024, 'a'); + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .AddStringProperty("long_str", long_str) + .Build(); + DocumentProto document2 = + DocumentBuilder().SetKey("namespace2", "uri2").Build(); + DocumentProto document3 = + DocumentBuilder().SetKey("namespace3", "uri3").Build(); + + { + ICING_ASSERT_OK_AND_ASSIGN( + PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, + PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + /*compress_in=*/true, max_proto_size_, + /*compression_level_in=*/3))); + auto proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.has_data_loss()); + + // Write the first proto + ICING_ASSERT_OK_AND_ASSIGN(document1_offset, + proto_log->WriteProto(document1)); + + // Check that what we read is what we wrote + ASSERT_THAT(proto_log->ReadProto(document1_offset), + IsOkAndHolds(EqualsProto(document1))); + + ICING_ASSERT_OK(proto_log->PersistToDisk()); + } + + // Make a new proto_log with the same file_path but different compression + // level, and make sure we can still read from and write to the same + // underlying file. + { + ICING_ASSERT_OK_AND_ASSIGN( + PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, + PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + /*compress_in=*/true, max_proto_size_, + /*compression_level_in=*/9))); + auto recreated_proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.has_data_loss()); + + // Check the first proto + ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset), + IsOkAndHolds(EqualsProto(document1))); + + // Write a second proto + ICING_ASSERT_OK_AND_ASSIGN(document2_offset, + recreated_proto_log->WriteProto(document2)); + + ASSERT_GT(document2_offset, document1_offset); + + // Check the second proto + ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset), + IsOkAndHolds(EqualsProto(document2))); + + ICING_ASSERT_OK(recreated_proto_log->PersistToDisk()); + } + + // One more time but with 0 compression level + { + ICING_ASSERT_OK_AND_ASSIGN( + PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, + PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + /*compress_in=*/true, max_proto_size_, + /*compression_level=*/0))); + auto recreated_proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.has_data_loss()); + + // Check the first proto + ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset), + IsOkAndHolds(EqualsProto(document1))); + + // Check the second proto + ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset), + IsOkAndHolds(EqualsProto(document2))); + + // Write a third proto + ICING_ASSERT_OK_AND_ASSIGN(document3_offset, + recreated_proto_log->WriteProto(document3)); + + ASSERT_GT(document3_offset, document2_offset); + + // Check the third proto + ASSERT_THAT(recreated_proto_log->ReadProto(document3_offset), + IsOkAndHolds(EqualsProto(document3))); + } +} + +TEST_F(PortableFileBackedProtoLogTest, + WriteDifferentCompressionLevelDifferentSizes) { + int document_log_size_with_compression_3; + int document_log_size_with_no_compression; + + // The first proto to write that's close to the max size. Leave some room for + // the rest of the proto properties. + std::string long_str(max_proto_size_ - 1024, 'a'); + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .AddStringProperty("long_str", long_str) + .Build(); + + { + ICING_ASSERT_OK_AND_ASSIGN( + PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, + PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + /*compress_in=*/true, max_proto_size_, + /*compression_level_in=*/3))); + auto proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.has_data_loss()); + + // Write the proto + ICING_ASSERT_OK(proto_log->WriteProto(document1)); + ICING_ASSERT_OK(proto_log->PersistToDisk()); + + document_log_size_with_compression_3 = + filesystem_.GetFileSize(file_path_.c_str()); + } + + // Delete the proto_log so we can reuse the file_path + filesystem_.DeleteFile(file_path_.c_str()); + + { + ICING_ASSERT_OK_AND_ASSIGN( + PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, + PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + PortableFileBackedProtoLog<DocumentProto>::Options( + /*compress_in=*/true, max_proto_size_, + /*compression_level_in=*/0))); + auto proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.has_data_loss()); + + // Write the proto + ICING_ASSERT_OK(proto_log->WriteProto(document1)); + ICING_ASSERT_OK(proto_log->PersistToDisk()); + + document_log_size_with_no_compression = + filesystem_.GetFileSize(file_path_.c_str()); + + // Uncompressed document file size should be larger than original compressed + // document file size + ASSERT_GT(document_log_size_with_no_compression, + document_log_size_with_compression_3); + } +} + TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) { { ICING_ASSERT_OK_AND_ASSIGN( @@ -311,7 +501,7 @@ TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto recreated_proto_log = std::move(create_result.proto_log); EXPECT_FALSE(create_result.has_data_loss()); } @@ -328,7 +518,7 @@ TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) { ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_)), + compress_, max_proto_size_, compression_level_)), StatusIs(libtextclassifier3::StatusCode::INTERNAL, HasSubstr("Invalid header checksum"))); } @@ -341,7 +531,7 @@ TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto recreated_proto_log = std::move(create_result.proto_log); EXPECT_FALSE(create_result.has_data_loss()); @@ -360,7 +550,7 @@ TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) { ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_)), + compress_, max_proto_size_, compression_level_)), StatusIs(libtextclassifier3::StatusCode::INTERNAL, HasSubstr("Invalid header kMagic"))); } @@ -383,7 +573,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); EXPECT_FALSE(create_result.has_data_loss()); @@ -410,7 +600,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); EXPECT_FALSE(create_result.has_data_loss()); EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE)); @@ -432,7 +622,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -478,7 +668,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); EXPECT_TRUE(create_result.has_data_loss()); EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE)); @@ -507,7 +697,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -537,7 +727,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); EXPECT_FALSE(create_result.has_data_loss()); @@ -569,7 +759,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -615,7 +805,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_TRUE(create_result.has_data_loss()); ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL)); @@ -640,7 +830,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -663,7 +853,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); // We previously persisted to disk so everything should be in a perfect // state. @@ -683,7 +873,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -707,7 +897,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); // We previously persisted to disk so everything should be in a perfect // state. @@ -726,7 +916,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -751,7 +941,7 @@ TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); // We previously persisted to disk so everything should be in a perfect // state. @@ -771,7 +961,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -797,7 +987,7 @@ TEST_F(PortableFileBackedProtoLogTest, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); // We previously persisted to disk so everything should be in a perfect // state. @@ -819,8 +1009,8 @@ TEST_F(PortableFileBackedProtoLogTest, Iterator) { PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, - PortableFileBackedProtoLog<DocumentProto>::Options(compress_, - max_proto_size_))); + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -872,7 +1062,7 @@ TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -890,7 +1080,7 @@ TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -915,8 +1105,8 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) { PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, - PortableFileBackedProtoLog<DocumentProto>::Options(compress_, - max_proto_size_))); + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -950,8 +1140,8 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldReturnNotFound) { PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result, PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, - PortableFileBackedProtoLog<DocumentProto>::Options(compress_, - max_proto_size_))); + PortableFileBackedProtoLog<DocumentProto>::Options( + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -993,7 +1183,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -1021,7 +1211,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -1041,7 +1231,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); ASSERT_FALSE(create_result.has_data_loss()); @@ -1063,7 +1253,7 @@ TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) { PortableFileBackedProtoLog<DocumentProto>::Create( &filesystem_, file_path_, PortableFileBackedProtoLog<DocumentProto>::Options( - compress_, max_proto_size_))); + compress_, max_proto_size_, compression_level_))); auto proto_log = std::move(create_result.proto_log); EXPECT_FALSE(create_result.has_data_loss()); } diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index b81390f..56c7795 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -705,7 +705,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore( schema_store_.get(), force_recovery_and_revalidate_documents, options_.document_store_namespace_id_fingerprint(), - initialize_stats)); + options_.compression_level(), initialize_stats)); document_store_ = std::move(create_result.document_store); return libtextclassifier3::Status::OK; @@ -1852,6 +1852,7 @@ SearchResultProto IcingSearchEngine::Search( std::unique_ptr<ScoredDocumentHitsRanker> ranker; if (join_children_fetcher != nullptr) { + std::unique_ptr<Timer> join_timer = clock_->GetNewTimer(); // Join 2 scored document hits JoinProcessor join_processor(document_store_.get(), schema_store_.get(), qualified_id_join_index_.get()); @@ -1865,7 +1866,8 @@ SearchResultProto IcingSearchEngine::Search( } std::vector<JoinedScoredDocumentHit> joined_result_document_hits = std::move(joined_result_document_hits_or).ValueOrDie(); - // TODO(b/256022027): set join latency + + query_stats->set_join_latency_ms(join_timer->GetElapsedMilliseconds()); std::unique_ptr<Timer> component_timer = clock_->GetNewTimer(); // Ranks results @@ -1921,8 +1923,11 @@ SearchResultProto IcingSearchEngine::Search( // Assembles the final search result proto result_proto.mutable_results()->Reserve( page_result_info.second.results.size()); + + int32_t child_count = 0; for (SearchResultProto::ResultProto& result : page_result_info.second.results) { + child_count += result.joined_results_size(); result_proto.mutable_results()->Add(std::move(result)); } @@ -1935,6 +1940,9 @@ SearchResultProto IcingSearchEngine::Search( component_timer->GetElapsedMilliseconds()); query_stats->set_num_results_returned_current_page( result_proto.results_size()); + + query_stats->set_num_joined_results_returned_current_page(child_count); + query_stats->set_num_results_with_snippets( page_result_info.second.num_results_with_snippets); return result_proto; @@ -2055,8 +2063,11 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) { // Assembles the final search result proto result_proto.mutable_results()->Reserve( page_result_info.second.results.size()); + + int32_t child_count = 0; for (SearchResultProto::ResultProto& result : page_result_info.second.results) { + child_count += result.joined_results_size(); result_proto.mutable_results()->Add(std::move(result)); } @@ -2075,6 +2086,8 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) { result_proto.results_size()); query_stats->set_num_results_with_snippets( page_result_info.second.num_results_with_snippets); + query_stats->set_num_joined_results_returned_current_page(child_count); + return result_proto; } @@ -2106,7 +2119,8 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) { // Copies valid document data to tmp directory libtextclassifier3::StatusOr<std::vector<DocumentId>> document_id_old_to_new_or = document_store_->OptimizeInto( - temporary_document_dir, language_segmenter_.get(), optimize_stats); + temporary_document_dir, language_segmenter_.get(), + options_.document_store_namespace_id_fingerprint(), optimize_stats); // Handles error if any if (!document_id_old_to_new_or.ok()) { @@ -2143,7 +2157,8 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) { auto create_result_or = DocumentStore::Create( filesystem_.get(), current_document_dir, clock_.get(), schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false, - options_.document_store_namespace_id_fingerprint()); + options_.document_store_namespace_id_fingerprint(), + options_.compression_level(), /*initialize_stats=*/nullptr); // TODO(b/144458732): Implement a more robust version of // TC_ASSIGN_OR_RETURN that can support error logging. if (!create_result_or.ok()) { @@ -2170,7 +2185,8 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) { auto create_result_or = DocumentStore::Create( filesystem_.get(), current_document_dir, clock_.get(), schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false, - options_.document_store_namespace_id_fingerprint()); + options_.document_store_namespace_id_fingerprint(), + options_.compression_level(), /*initialize_stats=*/nullptr); if (!create_result_or.ok()) { // Unable to create DocumentStore from the new file. Mark as uninitialized // and return INTERNAL. diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc index 9e8686d..cf654a8 100644 --- a/icing/icing-search-engine_benchmark.cc +++ b/icing/icing-search-engine_benchmark.cc @@ -16,7 +16,9 @@ #include <fstream> #include <iostream> +#include <limits> #include <memory> +#include <numeric> #include <ostream> #include <random> #include <sstream> @@ -32,6 +34,7 @@ #include "icing/document-builder.h" #include "icing/file/filesystem.h" #include "icing/icing-search-engine.h" +#include "icing/join/join-processor.h" #include "icing/proto/document.pb.h" #include "icing/proto/initialize.pb.h" #include "icing/proto/reset.pb.h" @@ -40,9 +43,12 @@ #include "icing/proto/search.pb.h" #include "icing/proto/status.pb.h" #include "icing/proto/term.pb.h" +#include "icing/query/query-features.h" #include "icing/schema-builder.h" #include "icing/testing/common-matchers.h" #include "icing/testing/document-generator.h" +#include "icing/testing/numeric/number-generator.h" +#include "icing/testing/numeric/uniform-distribution-integer-generator.h" #include "icing/testing/random-string.h" #include "icing/testing/schema-generator.h" #include "icing/testing/tmp-directory.h" @@ -178,6 +184,25 @@ std::vector<DocumentProto> GenerateRandomDocuments( return random_docs; } +std::unique_ptr<NumberGenerator<int64_t>> CreateIntegerGenerator( + size_t num_documents) { + // Since the collision # follows poisson distribution with lambda = + // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too many + // collisions. + // + // Distribution: + // - keys in range being picked for 0 times: 90.5% + // - keys in range being picked for 1 time: 9% + // - keys in range being picked for 2 times: 0.45% + // - keys in range being picked for 3 times: 0.015% + // + // For example, num_keys = 1M, range = 10M. Then there will be ~904837 unique + // keys, 45242 keys being picked twice, 1508 keys being picked thrice ... + return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>( + /*seed=*/12345, /*range_lower=*/0, + /*range_upper=*/static_cast<int64_t>(num_documents) * 10 - 1); +} + void BM_IndexLatency(benchmark::State& state) { // Initialize the filesystem std::string test_dir = GetTestTempDir() + "/icing/benchmark"; @@ -799,6 +824,395 @@ void BM_QueryWithSnippet(benchmark::State& state) { } BENCHMARK(BM_QueryWithSnippet); +void BM_NumericIndexing(benchmark::State& state) { + int num_documents = state.range(0); + int num_integers_per_doc = state.range(1); + + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + + // Create the schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("integer") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + + std::unique_ptr<NumberGenerator<int64_t>> integer_generator = + CreateIntegerGenerator(num_documents); + std::vector<DocumentProto> documents; + documents.reserve(num_documents); + for (int i = 0; i < num_documents; ++i) { + std::vector<int64_t> integers; + integers.reserve(num_integers_per_doc); + for (int j = 0; j < num_integers_per_doc; ++j) { + integers.push_back(integer_generator->Generate()); + } + + DocumentProto document = + DocumentBuilder() + .SetKey("namespace", "uri" + std::to_string(i)) + .SetSchema("Message") + .AddStringProperty("body", "body hello world") + .AddInt64Property("integer", integers.begin(), integers.end()) + .Build(); + documents.push_back(std::move(document)); + } + + for (auto s : state) { + state.PauseTiming(); + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + state.ResumeTiming(); + + for (const DocumentProto& document : documents) { + ASSERT_THAT(icing->Put(document).status(), ProtoIsOk()); + } + + state.PauseTiming(); + icing.reset(); + ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str())); + state.ResumeTiming(); + } +} + +BENCHMARK(BM_NumericIndexing) + // Arguments: num_documents, num_integers_per_doc + ->ArgPair(1000000, 5); + +void BM_NumericExactQuery(benchmark::State& state) { + int num_documents = state.range(0); + int num_integers_per_doc = state.range(1); + + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("integer") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + std::unique_ptr<NumberGenerator<int64_t>> integer_generator = + CreateIntegerGenerator(num_documents); + std::unordered_set<int64_t> chosen_integer_set; + for (int i = 0; i < num_documents; ++i) { + std::vector<int64_t> integers; + integers.reserve(num_integers_per_doc); + for (int j = 0; j < num_integers_per_doc; ++j) { + int64_t chosen_int = integer_generator->Generate(); + integers.push_back(chosen_int); + chosen_integer_set.insert(chosen_int); + } + + DocumentProto document = + DocumentBuilder() + .SetKey("namespace", "uri" + std::to_string(i)) + .SetSchema("Message") + .AddStringProperty("body", "body hello world") + .AddInt64Property("integer", integers.begin(), integers.end()) + .Build(); + ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk()); + } + + SearchSpecProto search_spec; + search_spec.set_search_type( + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); + search_spec.add_enabled_features(std::string(kNumericSearchFeature)); + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + + std::vector<int64_t> chosen_integers(chosen_integer_set.begin(), + chosen_integer_set.end()); + std::uniform_int_distribution<> distrib(0, chosen_integers.size() - 1); + std::default_random_engine e(/*seed=*/12345); + for (auto s : state) { + int64_t exact = chosen_integers[distrib(e)]; + search_spec.set_query("integer == " + std::to_string(exact)); + + SearchResultProto results = + icing->Search(search_spec, scoring_spec, result_spec); + ASSERT_THAT(results.status(), ProtoIsOk()); + ASSERT_GT(results.results_size(), 0); + if (results.next_page_token() != kInvalidNextPageToken) { + icing->InvalidateNextPageToken(results.next_page_token()); + } + } +} +BENCHMARK(BM_NumericExactQuery) + // Arguments: num_documents, num_integers_per_doc + ->ArgPair(1000000, 5); + +void BM_NumericRangeQueryAll(benchmark::State& state) { + int num_documents = state.range(0); + int num_integers_per_doc = state.range(1); + + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("integer") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + std::unique_ptr<NumberGenerator<int64_t>> integer_generator = + CreateIntegerGenerator(num_documents); + for (int i = 0; i < num_documents; ++i) { + std::vector<int64_t> integers; + integers.reserve(num_integers_per_doc); + for (int j = 0; j < num_integers_per_doc; ++j) { + integers.push_back(integer_generator->Generate()); + } + + DocumentProto document = + DocumentBuilder() + .SetKey("namespace", "uri" + std::to_string(i)) + .SetSchema("Message") + .AddStringProperty("body", "body hello world") + .AddInt64Property("integer", integers.begin(), integers.end()) + .Build(); + ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk()); + } + + SearchSpecProto search_spec; + search_spec.set_search_type( + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); + search_spec.add_enabled_features(std::string(kNumericSearchFeature)); + search_spec.set_query("integer >= " + + std::to_string(std::numeric_limits<int64_t>::min())); + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + + for (auto s : state) { + SearchResultProto results = + icing->Search(search_spec, scoring_spec, result_spec); + ASSERT_THAT(results.status(), ProtoIsOk()); + ASSERT_GT(results.results_size(), 0); + if (results.next_page_token() != kInvalidNextPageToken) { + icing->InvalidateNextPageToken(results.next_page_token()); + } + } +} +BENCHMARK(BM_NumericRangeQueryAll) + // Arguments: num_documents, num_integers_per_doc + ->ArgPair(1000000, 5); + +void BM_JoinQueryQualifiedId(benchmark::State& state) { + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("firstName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("lastName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("personQualifiedId") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + // Create Person documents (parent) + static constexpr int kNumPersonDocuments = 1000; + for (int i = 0; i < kNumPersonDocuments; ++i) { + std::string person_id = std::to_string(i); + DocumentProto person = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person" + person_id) + .SetSchema("Person") + .AddStringProperty("firstName", "first" + person_id) + .AddStringProperty("lastName", "last" + person_id) + .AddStringProperty("emailAddress", + "person" + person_id + "@gmail.com") + .Build(); + ASSERT_THAT(icing->Put(std::move(person)).status(), ProtoIsOk()); + } + + // Create Email documents (child) + static constexpr int kNumEmailDocuments = 10000; + std::uniform_int_distribution<> distrib(0, kNumPersonDocuments - 1); + std::default_random_engine e(/*seed=*/12345); + for (int i = 0; i < kNumEmailDocuments; ++i) { + std::string email_id = std::to_string(i); + std::string person_id = std::to_string(distrib(e)); + DocumentProto email = + DocumentBuilder() + .SetKey("namespace", "email" + email_id) + .SetSchema("Email") + .AddStringProperty("subject", "test subject " + email_id) + .AddStringProperty("body", "message body") + .AddStringProperty("personQualifiedId", + "pkg$db/namespace#person" + person_id) + .Build(); + ASSERT_THAT(icing->Put(std::move(email)).status(), ProtoIsOk()); + } + + // Parent SearchSpec + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("firstName:first"); + + // JoinSpec + JoinSpecProto* join_spec = search_spec.mutable_join_spec(); + join_spec->set_max_joined_child_count(std::numeric_limits<int32_t>::max()); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("personQualifiedId"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::MAX); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::PREFIX); + nested_search_spec->set_query("subject:test"); + *nested_spec->mutable_scoring_spec() = ScoringSpecProto::default_instance(); + *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + + static constexpr int kNumPerPage = 10; + ResultSpecProto result_spec; + result_spec.set_num_per_page(kNumPerPage); + + ScoringSpecProto score_spec = ScoringSpecProto::default_instance(); + + const auto child_count_reduce_func = + [](int child_count, const SearchResultProto::ResultProto& result) -> int { + return child_count + result.joined_results_size(); + }; + for (auto s : state) { + int total_parent_count = 0; + int total_child_count = 0; + SearchResultProto results = + icing->Search(search_spec, score_spec, result_spec); + total_parent_count += results.results_size(); + total_child_count += + std::reduce(results.results().begin(), results.results().end(), 0, + child_count_reduce_func); + + // Get all pages. + while (results.next_page_token() != kInvalidNextPageToken) { + results = icing->GetNextPage(results.next_page_token()); + total_parent_count += results.results_size(); + total_child_count += + std::reduce(results.results().begin(), results.results().end(), 0, + child_count_reduce_func); + } + + ASSERT_THAT(total_parent_count, Eq(kNumPersonDocuments)); + ASSERT_THAT(total_child_count, Eq(kNumEmailDocuments)); + } +} +BENCHMARK(BM_JoinQueryQualifiedId); + } // namespace } // namespace lib diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc index 6ba1737..0db4d54 100644 --- a/icing/icing-search-engine_initialization_test.cc +++ b/icing/icing-search-engine_initialization_test.cc @@ -383,6 +383,56 @@ TEST_F(IcingSearchEngineInitializationTest, ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } +TEST_F(IcingSearchEngineInitializationTest, + NegativeCompressionLevelReturnsInvalidArgument) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_compression_level(-1); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineInitializationTest, + GreaterThanMaxCompressionLevelReturnsInvalidArgument) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_compression_level(10); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineInitializationTest, GoodCompressionLevelReturnsOk) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_compression_level(0); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineInitializationTest, + ReinitializingWithDifferentCompressionLevelReturnsOk) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_compression_level(3); + { + IcingSearchEngine icing(options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document = CreateMessageDocument("namespace", "uri"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); + } + options.set_compression_level(9); + { + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + } + options.set_compression_level(0); + { + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + } +} + TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) { auto mock_filesystem = std::make_unique<MockFilesystem>(); // This fails DocumentStore::Create() @@ -980,7 +1030,12 @@ TEST_F(IcingSearchEngineInitializationTest, ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock, - schema_store.get())); + schema_store.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc index 0c5cb7a..48fae13 100644 --- a/icing/icing-search-engine_optimize_test.cc +++ b/icing/icing-search-engine_optimize_test.cc @@ -1753,6 +1753,87 @@ TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) { EXPECT_THAT(result.optimize_stats(), EqualsProto(expected)); } +TEST_F(IcingSearchEngineOptimizeTest, + OptimizationRewritesDocsWithNewCompressionLevel) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "message body one") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "message body two") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + icing_options.set_compression_level(3); + int64_t document_log_size_compression_3; + int64_t document_log_size_after_opti_no_compression; + int64_t document_log_size_after_opti_compression_3; + const std::string document_log_path = + icing_options.base_dir() + "/document_dir/" + + DocumentLogCreator::GetDocumentLogFilename(); + { + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); + document_log_size_compression_3 = + filesystem()->GetFileSize(document_log_path.c_str()); + } // Destroys IcingSearchEngine to make sure nothing is cached. + + // Turn off compression + icing_options.set_compression_level(0); + + { + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + // Document log size is the same even after reopening with a different + // compression level + ASSERT_EQ(document_log_size_compression_3, + filesystem()->GetFileSize(document_log_path.c_str())); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + document_log_size_after_opti_no_compression = + filesystem()->GetFileSize(document_log_path.c_str()); + // Document log size is larger after optimizing since optimizing rewrites + // with the new compression level which is 0 or none + ASSERT_GT(document_log_size_after_opti_no_compression, + document_log_size_compression_3); + } + + // Restore the original compression level + icing_options.set_compression_level(3); + + { + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + // Document log size is the same even after reopening with a different + // compression level + ASSERT_EQ(document_log_size_after_opti_no_compression, + filesystem()->GetFileSize(document_log_path.c_str())); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + document_log_size_after_opti_compression_3 = + filesystem()->GetFileSize(document_log_path.c_str()); + // Document log size should be the same as it was originally + ASSERT_EQ(document_log_size_after_opti_compression_3, + document_log_size_compression_3); + } +} + } // namespace } // namespace lib } // namespace icing diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc index f5b747b..63fb657 100644 --- a/icing/icing-search-engine_search_test.cc +++ b/icing/icing-search-engine_search_test.cc @@ -3317,6 +3317,7 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) { exp_stats.set_ranking_latency_ms(5); exp_stats.set_document_retrieval_latency_ms(5); exp_stats.set_lock_acquisition_latency_ms(5); + exp_stats.set_num_joined_results_returned_current_page(0); EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); // Second page, 2 result with 1 snippet @@ -3333,6 +3334,7 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) { exp_stats.set_latency_ms(5); exp_stats.set_document_retrieval_latency_ms(5); exp_stats.set_lock_acquisition_latency_ms(5); + exp_stats.set_num_joined_results_returned_current_page(0); EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); // Third page, 1 result with 0 snippets @@ -3349,6 +3351,261 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) { exp_stats.set_latency_ms(5); exp_stats.set_document_retrieval_latency_ms(5); exp_stats.set_lock_acquisition_latency_ms(5); + exp_stats.set_num_joined_results_returned_current_page(0); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); +} + +TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) { + auto fake_clock = std::make_unique<FakeClock>(); + fake_clock->SetTimerElapsedMilliseconds(5); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique<Filesystem>(), + std::make_unique<IcingFilesystem>(), + std::move(fake_clock), GetTestJniCache()); + + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("firstName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("lastName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("personQualifiedId") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + DocumentProto person1 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person1") + .SetSchema("Person") + .AddStringProperty("firstName", "first1") + .AddStringProperty("lastName", "last1") + .AddStringProperty("emailAddress", "email1@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + DocumentProto person2 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person2") + .SetSchema("Person") + .AddStringProperty("firstName", "first2") + .AddStringProperty("lastName", "last2") + .AddStringProperty("emailAddress", "email2@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(2) + .Build(); + DocumentProto person3 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person3") + .SetSchema("Person") + .AddStringProperty("firstName", "first3") + .AddStringProperty("lastName", "last3") + .AddStringProperty("emailAddress", "email3@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(3) + .Build(); + + DocumentProto email1 = + DocumentBuilder() + .SetKey("namespace", "email1") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 1") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(3) + .Build(); + DocumentProto email2 = + DocumentBuilder() + .SetKey("namespace", "email2") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 2") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(2) + .Build(); + DocumentProto email3 = + DocumentBuilder() + .SetKey("namespace", "email3") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 3") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk()); + + // Parent SearchSpec + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("firstName:first"); + search_spec.set_search_type(GetParam()); + + // JoinSpec + JoinSpecProto* join_spec = search_spec.mutable_join_spec(); + join_spec->set_max_joined_child_count(100); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("personQualifiedId"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::COUNT); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::PREFIX); + nested_search_spec->set_query("subject:test"); + nested_search_spec->set_search_type(GetParam()); + *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); + *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + + // Parent ScoringSpec + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE); + scoring_spec.set_order_by(ScoringSpecProto::Order::DESC); + + // Parent ResultSpec + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + + // Since we: + // - Use MAX for aggregation scoring strategy. + // - (Default) use DOCUMENT_SCORE to score child documents. + // - (Default) use DESC as the ranking order. + // + // person1 + email1 should have the highest aggregated score (3) and be + // returned first. person2 + email2 (aggregated score = 2) should be the + // second, and person3 + email3 (aggregated score = 1) should be the last. + SearchResultProto expected_result1; + expected_result1.mutable_status()->set_code(StatusProto::OK); + SearchResultProto::ResultProto* result_proto1 = + expected_result1.mutable_results()->Add(); + *result_proto1->mutable_document() = person1; + *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1; + *result_proto1->mutable_joined_results()->Add()->mutable_document() = email2; + + SearchResultProto expected_result2; + expected_result2.mutable_status()->set_code(StatusProto::OK); + SearchResultProto::ResultProto* result_google::protobuf = + expected_result2.mutable_results()->Add(); + *result_google::protobuf->mutable_document() = person2; + *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email3; + + SearchResultProto expected_result3; + expected_result3.mutable_status()->set_code(StatusProto::OK); + SearchResultProto::ResultProto* result_proto3 = + expected_result3.mutable_results()->Add(); + *result_proto3->mutable_document() = person3; + + SearchResultProto search_result = + icing.Search(search_spec, scoring_spec, result_spec); + uint64_t next_page_token = search_result.next_page_token(); + EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken)); + expected_result1.set_next_page_token(next_page_token); + ASSERT_THAT(search_result, + EqualsSearchResultIgnoreStatsAndScores(expected_result1)); + + // Check the stats + QueryStatsProto exp_stats; + exp_stats.set_query_length(15); + exp_stats.set_num_terms(1); + exp_stats.set_num_namespaces_filtered(0); + exp_stats.set_num_schema_types_filtered(0); + exp_stats.set_ranking_strategy( + ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE); + exp_stats.set_is_first_page(true); + exp_stats.set_requested_page_size(1); + exp_stats.set_num_results_returned_current_page(1); + exp_stats.set_num_documents_scored(3); + exp_stats.set_num_results_with_snippets(0); + exp_stats.set_latency_ms(5); + exp_stats.set_parse_query_latency_ms(5); + exp_stats.set_scoring_latency_ms(5); + exp_stats.set_ranking_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + exp_stats.set_lock_acquisition_latency_ms(5); + exp_stats.set_num_joined_results_returned_current_page(2); + exp_stats.set_join_latency_ms(5); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); + + // Second page, 1 child doc. + search_result = icing.GetNextPage(next_page_token); + next_page_token = search_result.next_page_token(); + EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken)); + expected_result2.set_next_page_token(next_page_token); + EXPECT_THAT(search_result, + EqualsSearchResultIgnoreStatsAndScores(expected_result2)); + + exp_stats = QueryStatsProto(); + exp_stats.set_is_first_page(false); + exp_stats.set_requested_page_size(1); + exp_stats.set_num_results_returned_current_page(1); + exp_stats.set_num_results_with_snippets(0); + exp_stats.set_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + exp_stats.set_lock_acquisition_latency_ms(5); + exp_stats.set_num_joined_results_returned_current_page(1); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); + + // Third page, 0 child docs. + search_result = icing.GetNextPage(next_page_token); + next_page_token = search_result.next_page_token(); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(1)); + ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken)); + + exp_stats = QueryStatsProto(); + exp_stats.set_is_first_page(false); + exp_stats.set_requested_page_size(1); + exp_stats.set_num_results_returned_current_page(1); + exp_stats.set_num_joined_results_returned_current_page(0); + exp_stats.set_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + exp_stats.set_lock_acquisition_latency_ms(5); + exp_stats.set_num_results_with_snippets(0); + ASSERT_THAT(search_result, + EqualsSearchResultIgnoreStatsAndScores(expected_result3)); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); + + ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken)); + + search_result = icing.GetNextPage(search_result.next_page_token()); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), IsEmpty()); + ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken)); + + exp_stats = QueryStatsProto(); + exp_stats.set_is_first_page(false); + exp_stats.set_lock_acquisition_latency_ms(5); EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); } diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc index 47baabe..9453e58 100644 --- a/icing/index/index-processor_test.cc +++ b/icing/index/index-processor_test.cc @@ -273,7 +273,12 @@ class IndexProcessorTest : public Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); doc_store_ = std::move(create_result.document_store); ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc index 71c6bd5..895fe57 100644 --- a/icing/index/integer-section-indexing-handler_test.cc +++ b/icing/index/integer-section-indexing-handler_test.cc @@ -163,7 +163,12 @@ class IntegerSectionIndexingHandlerTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult doc_store_create_result, DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(doc_store_create_result.document_store); } diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc index ddb216a..0900e1f 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc @@ -48,6 +48,17 @@ using ::testing::ElementsAre; using ::testing::Eq; using ::testing::IsEmpty; +libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore( + const Filesystem* filesystem, const std::string& base_dir, + const Clock* clock, const SchemaStore* schema_store) { + return DocumentStore::Create( + filesystem, base_dir, clock, schema_store, + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr); +} + class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test { protected: DocHitInfoIteratorDeletedFilterTest() @@ -73,8 +84,8 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); document_store_ = std::move(create_result.document_store); } @@ -240,8 +251,8 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); document_store_ = std::move(create_result.document_store); } @@ -395,8 +406,8 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); document_store_ = std::move(create_result.document_store); } @@ -535,8 +546,8 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); document_store_ = std::move(create_result.document_store); } @@ -563,8 +574,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -595,8 +606,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -627,8 +638,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -660,8 +671,8 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -735,8 +746,8 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); document_store_ = std::move(create_result.document_store); } @@ -770,8 +781,8 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc index 5dd69c1..60b9a12 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc @@ -100,7 +100,12 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); } diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc index db1983c..f3901e1 100644 --- a/icing/index/numeric/integer-index-storage.cc +++ b/icing/index/numeric/integer-index-storage.cc @@ -818,6 +818,9 @@ IntegerIndexStorage::InitializeExistingFiles( /*max_file_size=*/kMetadataFileSize, /*pre_mapping_file_offset=*/0, /*pre_mapping_mmap_size=*/kMetadataFileSize)); + if (metadata_mmapped_file.available_size() != kMetadataFileSize) { + return absl_ports::FailedPreconditionError("Incorrect metadata file size"); + } // Initialize sorted_buckets int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10); diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc index 2f876e4..e7978bf 100644 --- a/icing/index/numeric/integer-index.cc +++ b/icing/index/numeric/integer-index.cc @@ -293,6 +293,10 @@ libtextclassifier3::Status IntegerIndex::Optimize( /*max_file_size=*/kMetadataFileSize, /*pre_mapping_file_offset=*/0, /*pre_mapping_mmap_size=*/kMetadataFileSize)); + if (metadata_mmapped_file.available_size() != kMetadataFileSize) { + return absl_ports::InternalError( + "Invalid metadata file size after Optimize"); + } metadata_mmapped_file_ = std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)); @@ -410,6 +414,9 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem, /*max_file_size=*/kMetadataFileSize, /*pre_mapping_file_offset=*/0, /*pre_mapping_mmap_size=*/kMetadataFileSize)); + if (metadata_mmapped_file.available_size() != kMetadataFileSize) { + return absl_ports::FailedPreconditionError("Incorrect metadata file size"); + } auto posting_list_serializer = std::make_unique<PostingListIntegerIndexSerializer>(); diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc index c4dacb8..ec7f55b 100644 --- a/icing/index/numeric/integer-index_test.cc +++ b/icing/index/numeric/integer-index_test.cc @@ -84,7 +84,12 @@ class NumericIndexIntegerTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult doc_store_create_result, DocumentStore::Create(&filesystem_, document_store_dir, &clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); doc_store_ = std::move(doc_store_create_result.document_store); } @@ -132,7 +137,8 @@ class NumericIndexIntegerTest : public ::testing::Test { } ICING_ASSIGN_OR_RETURN( std::vector<DocumentId> docid_map, - doc_store_->OptimizeInto(document_store_compact_dir, nullptr)); + doc_store_->OptimizeInto(document_store_compact_dir, nullptr, + /*namespace_id_fingerprint=*/false)); doc_store_.reset(); if (!filesystem_.SwapFiles(document_store_dir.c_str(), @@ -147,7 +153,12 @@ class NumericIndexIntegerTest : public ::testing::Test { ICING_ASSIGN_OR_RETURN( DocumentStore::CreateResult doc_store_create_result, DocumentStore::Create(&filesystem_, document_store_dir, &clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); doc_store_ = std::move(doc_store_create_result.document_store); return docid_map; } diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc index 25d4cfc..67b6201 100644 --- a/icing/join/join-processor_test.cc +++ b/icing/join/join-processor_test.cc @@ -110,7 +110,12 @@ class JoinProcessorTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); doc_store_ = std::move(create_result.document_store); ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_, diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc index 89b8e89..558da1c 100644 --- a/icing/monkey_test/icing-monkey-test-runner.cc +++ b/icing/monkey_test/icing-monkey-test-runner.cc @@ -455,9 +455,15 @@ void IcingMonkeyTestRunner::DoOptimize() { void IcingMonkeyTestRunner::CreateIcingSearchEngine() { std::uniform_int_distribution<> dist(0, 1); + bool always_rebuild_index_optimize = dist(random_); + float optimize_rebuild_index_threshold = + always_rebuild_index_optimize ? 0.0 : 0.9; + IcingSearchEngineOptions icing_options; icing_options.set_index_merge_size(config_.index_merge_size); icing_options.set_base_dir(icing_dir_->dir()); + icing_options.set_optimize_rebuild_index_threshold( + optimize_rebuild_index_threshold); // The method will be called every time when we ReloadFromDisk(), so randomly // flip this flag to test document store's compatibility. icing_options.set_document_store_namespace_id_fingerprint( diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc index 7aef40f..c48d9ad 100644 --- a/icing/query/advanced_query_parser/query-visitor_test.cc +++ b/icing/query/advanced_query_parser/query-visitor_test.cc @@ -115,7 +115,12 @@ class QueryVisitorTest : public ::testing::TestWithParam<QueryType> { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, store_dir_, &clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); Index::Options options(index_dir_.c_str(), diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc index 111b598..6826c22 100644 --- a/icing/query/query-processor_benchmark.cc +++ b/icing/query/query-processor_benchmark.cc @@ -92,6 +92,17 @@ std::unique_ptr<Normalizer> CreateNormalizer() { .ValueOrDie(); } +libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore( + const Filesystem* filesystem, const std::string& base_dir, + const Clock* clock, const SchemaStore* schema_store) { + return DocumentStore::Create( + filesystem, base_dir, clock, schema_store, + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr); +} + void BM_QueryOneTerm(benchmark::State& state) { bool run_via_adb = absl::GetFlag(FLAGS_adb); if (!run_via_adb) { @@ -136,8 +147,8 @@ void BM_QueryOneTerm(benchmark::State& state) { ICING_ASSERT_OK(schema_store->SetSchema(schema)); DocumentStore::CreateResult create_result = - DocumentStore::Create(&filesystem, doc_store_dir, &clock, - schema_store.get()) + CreateDocumentStore(&filesystem, doc_store_dir, &clock, + schema_store.get()) .ValueOrDie(); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -262,8 +273,8 @@ void BM_QueryFiveTerms(benchmark::State& state) { ICING_ASSERT_OK(schema_store->SetSchema(schema)); DocumentStore::CreateResult create_result = - DocumentStore::Create(&filesystem, doc_store_dir, &clock, - schema_store.get()) + CreateDocumentStore(&filesystem, doc_store_dir, &clock, + schema_store.get()) .ValueOrDie(); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -406,8 +417,8 @@ void BM_QueryDiacriticTerm(benchmark::State& state) { ICING_ASSERT_OK(schema_store->SetSchema(schema)); DocumentStore::CreateResult create_result = - DocumentStore::Create(&filesystem, doc_store_dir, &clock, - schema_store.get()) + CreateDocumentStore(&filesystem, doc_store_dir, &clock, + schema_store.get()) .ValueOrDie(); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -535,8 +546,8 @@ void BM_QueryHiragana(benchmark::State& state) { ICING_ASSERT_OK(schema_store->SetSchema(schema)); DocumentStore::CreateResult create_result = - DocumentStore::Create(&filesystem, doc_store_dir, &clock, - schema_store.get()) + CreateDocumentStore(&filesystem, doc_store_dir, &clock, + schema_store.get()) .ValueOrDie(); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc index cbf859b..47245fd 100644 --- a/icing/query/query-processor_test.cc +++ b/icing/query/query-processor_test.cc @@ -63,6 +63,17 @@ using ::testing::IsEmpty; using ::testing::SizeIs; using ::testing::UnorderedElementsAre; +libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore( + const Filesystem* filesystem, const std::string& base_dir, + const Clock* clock, const SchemaStore* schema_store) { + return DocumentStore::Create( + filesystem, base_dir, clock, schema_store, + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr); +} + class QueryProcessorTest : public ::testing::TestWithParam<SearchSpecProto::SearchType::Code> { protected: @@ -96,8 +107,8 @@ class QueryProcessorTest ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, store_dir_, &fake_clock_, + schema_store_.get())); document_store_ = std::move(create_result.document_store); Index::Options options(index_dir_, @@ -2501,8 +2512,8 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, store_dir_, &fake_clock, - schema_store_.get())); + CreateDocumentStore(&filesystem_, store_dir_, &fake_clock, + schema_store_.get())); document_store_ = std::move(create_result.document_store); ICING_ASSERT_OK_AND_ASSIGN( @@ -2559,8 +2570,8 @@ TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, store_dir_, &fake_clock, - schema_store_.get())); + CreateDocumentStore(&filesystem_, store_dir_, &fake_clock, + schema_store_.get())); document_store_ = std::move(create_result.document_store); ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc index d4ecec0..7d45de7 100644 --- a/icing/query/suggestion-processor_test.cc +++ b/icing/query/suggestion-processor_test.cc @@ -86,7 +86,12 @@ class SuggestionProcessorTest : public Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); Index::Options options(index_dir_, diff --git a/icing/result/result-retriever-v2_group-result-limiter_test.cc b/icing/result/result-retriever-v2_group-result-limiter_test.cc index 7027cc5..d4aaa38 100644 --- a/icing/result/result-retriever-v2_group-result-limiter_test.cc +++ b/icing/result/result-retriever-v2_group-result-limiter_test.cc @@ -88,7 +88,12 @@ class ResultRetrieverV2GroupResultLimiterTest : public testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); } diff --git a/icing/result/result-retriever-v2_projection_test.cc b/icing/result/result-retriever-v2_projection_test.cc index 629fb34..94580d4 100644 --- a/icing/result/result-retriever-v2_projection_test.cc +++ b/icing/result/result-retriever-v2_projection_test.cc @@ -115,7 +115,12 @@ class ResultRetrieverV2ProjectionTest : public testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); } diff --git a/icing/result/result-retriever-v2_snippet_test.cc b/icing/result/result-retriever-v2_snippet_test.cc index b32800c..3dce0ef 100644 --- a/icing/result/result-retriever-v2_snippet_test.cc +++ b/icing/result/result-retriever-v2_snippet_test.cc @@ -107,7 +107,12 @@ class ResultRetrieverV2SnippetTest : public testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); } diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc index 6c2aa67..462d535 100644 --- a/icing/result/result-retriever-v2_test.cc +++ b/icing/result/result-retriever-v2_test.cc @@ -197,6 +197,17 @@ ResultSpecProto CreateResultSpec( return result_spec; } +libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore( + const Filesystem* filesystem, const std::string& base_dir, + const Clock* clock, const SchemaStore* schema_store) { + return DocumentStore::Create( + filesystem, base_dir, clock, schema_store, + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr); +} + TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) { EXPECT_THAT( ResultRetrieverV2::Create(/*doc_store=*/nullptr, schema_store_.get(), @@ -205,8 +216,8 @@ TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -227,8 +238,8 @@ TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) { TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -320,8 +331,8 @@ TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) { TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -392,8 +403,8 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&mock_filesystem, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -437,8 +448,8 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) { TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -522,8 +533,8 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) { TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -621,8 +632,8 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) { TEST_F(ResultRetrieverV2Test, ShouldLimitNumTotalBytesPerPage) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -681,8 +692,8 @@ TEST_F(ResultRetrieverV2Test, ShouldReturnSingleLargeResultAboveNumTotalBytesPerPageThreshold) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -743,8 +754,8 @@ TEST_F(ResultRetrieverV2Test, ShouldRetrieveNextResultWhenBelowNumTotalBytesPerPageThreshold) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc index c0ea49a..44bfe2d 100644 --- a/icing/result/result-state-manager_test.cc +++ b/icing/result/result-state-manager_test.cc @@ -106,7 +106,12 @@ class ResultStateManagerTest : public testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult result, DocumentStore::Create(&filesystem_, test_dir_, clock_.get(), - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(result.document_store); ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/result/result-state-manager_thread-safety_test.cc b/icing/result/result-state-manager_thread-safety_test.cc index 55eda85..670578f 100644 --- a/icing/result/result-state-manager_thread-safety_test.cc +++ b/icing/result/result-state-manager_thread-safety_test.cc @@ -99,7 +99,12 @@ class ResultStateManagerThreadSafetyTest : public testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult result, DocumentStore::Create(&filesystem_, test_dir_, clock_.get(), - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(result.document_store); ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc index 8706b6d..35b6401 100644 --- a/icing/result/result-state-v2_test.cc +++ b/icing/result/result-state-v2_test.cc @@ -68,7 +68,12 @@ class ResultStateV2Test : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult result, DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(result.document_store); num_total_hits_ = 0; diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc index d654195..2c4023c 100644 --- a/icing/result/snippet-retriever.cc +++ b/icing/result/snippet-retriever.cc @@ -489,8 +489,7 @@ void GetEntriesFromProperty(const PropertyProto* current_property, std::string_view value = current_property->string_values(i); std::unique_ptr<Tokenizer::Iterator> iterator = tokenizer - ->Tokenize(value, - LanguageSegmenter::AccessType::kBidirectionalIterator) + ->Tokenize(value, LanguageSegmenter::AccessType::kForwardIterator) .ValueOrDie(); // All iterators are moved through positions sequentially. Constructing them // each time resets them to the beginning of the string. This means that, diff --git a/icing/schema-builder.h b/icing/schema-builder.h index d1ef42f..1dceb62 100644 --- a/icing/schema-builder.h +++ b/icing/schema-builder.h @@ -158,6 +158,11 @@ class SchemaTypeConfigBuilder { return *this; } + SchemaTypeConfigBuilder& SetParentType(std::string_view parent_type) { + type_config_.set_parent_type(std::string(parent_type)); + return *this; + } + SchemaTypeConfigBuilder& SetVersion(int version) { type_config_.set_version(version); return *this; diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc index ea0d85a..f3f7aad 100644 --- a/icing/schema/schema-util.cc +++ b/icing/schema/schema-util.cc @@ -280,6 +280,15 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) { } known_types.insert(schema_type); unknown_types.erase(schema_type); + if (!type_config.parent_type().empty()) { + std::string_view parent_schema_type(type_config.parent_type()); + if (known_types.count(parent_schema_type) == 0) { + unknown_types.insert(parent_schema_type); + } + // Try to add schema_type to the parent type's dependent map when it is + // not present already, in which case the value will be an empty vector. + dependent_map[parent_schema_type].insert({schema_type, {}}); + } for (const auto& property_config : type_config.properties()) { if (property_config.data_type() == PropertyConfigProto::DataType::DOCUMENT) { diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h index 47bb76b..825625e 100644 --- a/icing/schema/schema-util.h +++ b/icing/schema/schema-util.h @@ -34,15 +34,20 @@ class SchemaUtil { std::unordered_map<std::string, const SchemaTypeConfigProto>; // If A -> B is indicated in the map, then type A must be built before - // building type B, i.e. B has a property of type A. Also include all - // PropertyConfigProto (with DOCUMENT data_type) pointers which directly - // connects type A and B. IOW, this vector of PropertyConfigProto* are "direct - // edges" connecting A and B directly. It will be an empty vector if A and B - // are not "directly" connected, but instead via another intermediate level of - // schema type. For example, the actual dependency is A -> C -> B, so there - // will be A -> C and C -> B with valid PropertyConfigProto* respectively in - // this map, but we will also expand transitive dependents: add A -> B into - // dependent map with empty vector of "edges". + // building type B, which implies one of the following situations. + // + // 1. B has a property of type A. + // 2. A is a parent type of B via polymorphism. + // + // For the first case, this map will also include all PropertyConfigProto + // (with DOCUMENT data_type) pointers which *directly* connects type A and B. + // IOW, this vector of PropertyConfigProto* are "direct edges" connecting A + // and B directly. It will be an empty vector if A and B are not "directly" + // connected, but instead via another intermediate level of schema type. For + // example, the actual dependency is A -> C -> B, so there will be A -> C and + // C -> B with valid PropertyConfigProto* respectively in this map, but we + // will also expand transitive dependents: add A -> B into dependent map with + // empty vector of "edges". using DependentMap = std::unordered_map< std::string_view, std::unordered_map<std::string_view, diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc index 2d1e683..df7a421 100644 --- a/icing/schema/schema-util_test.cc +++ b/icing/schema/schema-util_test.cc @@ -36,6 +36,7 @@ using ::testing::HasSubstr; using ::testing::IsEmpty; using ::testing::Pair; using ::testing::Pointee; +using ::testing::SizeIs; using ::testing::UnorderedElementsAre; // Properties/fields in a schema type @@ -467,6 +468,229 @@ TEST(SchemaUtilTest, NonExistentType) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } +TEST(SchemaUtilTest, SimpleInheritance) { + // Create a schema with the following inheritance relation: + // A <- B + SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema)); + EXPECT_THAT(d_map, SizeIs(1)); + EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty()))); +} + +TEST(SchemaUtilTest, ComplexInheritance) { + // Create a schema with the following inheritance relation: + // A + // / \ + // B E + // / \ + // C D + // | + // F + SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder().SetType("D").SetParentType("B").Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder().SetType("E").SetParentType("A").Build(); + SchemaTypeConfigProto type_f = + SchemaTypeConfigBuilder().SetType("F").SetParentType("D").Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .AddType(type_f) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema)); + EXPECT_THAT(d_map, SizeIs(3)); + EXPECT_THAT(d_map["A"], + UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()), + Pair("D", IsEmpty()), Pair("E", IsEmpty()), + Pair("F", IsEmpty()))); + EXPECT_THAT(d_map["B"], + UnorderedElementsAre(Pair("C", IsEmpty()), Pair("D", IsEmpty()), + Pair("F", IsEmpty()))); + EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair("F", IsEmpty()))); +} + +TEST(SchemaUtilTest, InheritanceCycle) { + // Create a schema with the following inheritance relation: + // C <- A <- B <- C + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder().SetType("A").SetParentType("C").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST(SchemaUtilTest, SelfInheritance) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder().SetType("A").SetParentType("A").Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST(SchemaUtilTest, NonExistentParentType) { + // Create a schema with the following inheritance relation: + // (does not exist) X <- A <- B <- C + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder().SetType("A").SetParentType("X").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST(SchemaUtilTest, SimpleInheritanceWithNestedType) { + // Create a schema with the following dependent relation: + // A - B (via inheritance) + // B - C (via nested document) + SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema)); + EXPECT_THAT(d_map, SizeIs(2)); + EXPECT_THAT(d_map["A"], + UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()))); + EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair( + "C", UnorderedElementsAre(Pointee( + EqualsProto(type_c.properties(0))))))); +} + +TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) { + // Create a schema with the following dependent relation: + // A + // / \ + // B E + // / \ + // C D + // | + // F + // Approach: + // B extends A + // C extends B + // D has a nested document of type B + // E has a nested document of type A + // F has a nested document of type D + SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder() + .SetType("E") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_f = + SchemaTypeConfigBuilder() + .SetType("F") + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .AddType(type_f) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema)); + EXPECT_THAT(d_map, SizeIs(3)); + EXPECT_THAT( + d_map["A"], + UnorderedElementsAre( + Pair("B", IsEmpty()), Pair("C", IsEmpty()), Pair("D", IsEmpty()), + Pair("E", UnorderedElementsAre( + Pointee(EqualsProto(type_e.properties(0))))), + Pair("F", IsEmpty()))); + EXPECT_THAT( + d_map["B"], + UnorderedElementsAre(Pair("C", IsEmpty()), + Pair("D", UnorderedElementsAre(Pointee( + EqualsProto(type_d.properties(0))))), + Pair("F", IsEmpty()))); + EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair( + "F", UnorderedElementsAre(Pointee( + EqualsProto(type_f.properties(0))))))); +} + +TEST(SchemaUtilTest, InheritanceWithNestedTypeCycle) { + // Create a schema that A and B depend on each other, in the sense that B + // extends A but A has a nested document of type B. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + TEST(SchemaUtilTest, EmptySchemaProtoIsValid) { SchemaProto schema; ICING_ASSERT_OK(SchemaUtil::Validate(schema)); diff --git a/icing/scoring/advanced_scoring/advanced-scorer.cc b/icing/scoring/advanced_scoring/advanced-scorer.cc index 771615c..8925024 100644 --- a/icing/scoring/advanced_scoring/advanced-scorer.cc +++ b/icing/scoring/advanced_scoring/advanced-scorer.cc @@ -46,10 +46,10 @@ AdvancedScorer::Create(const ScoringSpecProto& scoring_spec, ICING_ASSIGN_OR_RETURN(std::unique_ptr<SectionWeights> section_weights, SectionWeights::Create(schema_store, scoring_spec)); std::unique_ptr<Bm25fCalculator> bm25f_calculator = - std::make_unique<Bm25fCalculator>(document_store, - std::move(section_weights)); + std::make_unique<Bm25fCalculator>(document_store, section_weights.get()); ScoringVisitor visitor(default_score, document_store, schema_store, - bm25f_calculator.get(), join_children_fetcher); + section_weights.get(), bm25f_calculator.get(), + join_children_fetcher); tree_root->Accept(&visitor); ICING_ASSIGN_OR_RETURN(std::unique_ptr<ScoreExpression> expression, @@ -58,8 +58,9 @@ AdvancedScorer::Create(const ScoringSpecProto& scoring_spec, return absl_ports::InvalidArgumentError( "The root scoring expression is not of double type."); } - return std::unique_ptr<AdvancedScorer>(new AdvancedScorer( - std::move(expression), std::move(bm25f_calculator), default_score)); + return std::unique_ptr<AdvancedScorer>( + new AdvancedScorer(std::move(expression), std::move(section_weights), + std::move(bm25f_calculator), default_score)); } } // namespace lib diff --git a/icing/scoring/advanced_scoring/advanced-scorer.h b/icing/scoring/advanced_scoring/advanced-scorer.h index 1a1cd5c..aa7914f 100644 --- a/icing/scoring/advanced_scoring/advanced-scorer.h +++ b/icing/scoring/advanced_scoring/advanced-scorer.h @@ -66,9 +66,11 @@ class AdvancedScorer : public Scorer { private: explicit AdvancedScorer(std::unique_ptr<ScoreExpression> score_expression, + std::unique_ptr<SectionWeights> section_weights, std::unique_ptr<Bm25fCalculator> bm25f_calculator, double default_score) : score_expression_(std::move(score_expression)), + section_weights_(std::move(section_weights)), bm25f_calculator_(std::move(bm25f_calculator)), default_score_(default_score) { if (is_constant()) { @@ -78,6 +80,7 @@ class AdvancedScorer : public Scorer { } std::unique_ptr<ScoreExpression> score_expression_; + std::unique_ptr<SectionWeights> section_weights_; std::unique_ptr<Bm25fCalculator> bm25f_calculator_; double default_score_; }; diff --git a/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc index d6262b0..8db592a 100644 --- a/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc +++ b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc @@ -38,7 +38,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { .ValueOrDie(); std::unique_ptr<DocumentStore> document_store = DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock, - schema_store.get()) + schema_store.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr) .ValueOrDie() .document_store; diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc index b3a47ba..c962bc5 100644 --- a/icing/scoring/advanced_scoring/advanced-scorer_test.cc +++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc @@ -65,7 +65,12 @@ class AdvancedScorerTest : public testing::Test { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); // Creates a simple email schema @@ -77,8 +82,31 @@ class AdvancedScorerTest : public testing::Test { .SetDataTypeString( TermMatchType::PREFIX, StringIndexingConfig::TokenizerType::PLAIN) - .SetDataType(TYPE_STRING) .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("person") + .AddProperty( + PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString( + TermMatchType::PREFIX, + StringIndexingConfig::TokenizerType::PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString( + TermMatchType::PREFIX, + StringIndexingConfig::TokenizerType::PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + + .AddProperty( + PropertyConfigBuilder() + .SetName("phoneNumber") + .SetDataTypeString( + TermMatchType::PREFIX, + StringIndexingConfig::TokenizerType::PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema)); @@ -135,6 +163,27 @@ ScoringSpecProto CreateAdvancedScoringSpec( return scoring_spec; } +PropertyWeight CreatePropertyWeight(std::string path, double weight) { + PropertyWeight property_weight; + property_weight.set_path(std::move(path)); + property_weight.set_weight(weight); + return property_weight; +} + +TypePropertyWeights CreateTypePropertyWeights( + std::string schema_type, std::vector<PropertyWeight>&& property_weights) { + TypePropertyWeights type_property_weights; + type_property_weights.set_schema_type(std::move(schema_type)); + type_property_weights.mutable_property_weights()->Reserve( + property_weights.size()); + + for (PropertyWeight& property_weight : property_weights) { + *type_property_weights.add_property_weights() = std::move(property_weight); + } + + return type_property_weights; +} + TEST_F(AdvancedScorerTest, InvalidAdvancedScoringSpec) { // Empty scoring expression for advanced scoring ScoringSpecProto scoring_spec; @@ -541,6 +590,161 @@ TEST_F(AdvancedScorerTest, ChildrenScoresFunctionScoreExpression) { Eq(default_score)); } +TEST_F(AdvancedScorerTest, PropertyWeightsFunctionScoreExpression) { + DocumentProto test_document_1 = + DocumentBuilder().SetKey("namespace", "uri1").SetSchema("email").Build(); + DocumentProto test_document_2 = + DocumentBuilder().SetKey("namespace", "uri2").SetSchema("person").Build(); + DocumentProto test_document_3 = + DocumentBuilder().SetKey("namespace", "uri3").SetSchema("person").Build(); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_1, + document_store_->Put(test_document_1)); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_2, + document_store_->Put(test_document_2)); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_3, + document_store_->Put(test_document_3)); + + ScoringSpecProto spec_proto = CreateAdvancedScoringSpec(""); + + *spec_proto.add_type_property_weights() = CreateTypePropertyWeights( + /*schema_type=*/"email", + {CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0)}); + *spec_proto.add_type_property_weights() = CreateTypePropertyWeights( + /*schema_type=*/"person", + {CreatePropertyWeight(/*path=*/"emailAddress", /*weight=*/0.5), + CreatePropertyWeight(/*path=*/"name", /*weight=*/0.8), + CreatePropertyWeight(/*path=*/"phoneNumber", /*weight=*/1.0)}); + + // Let the hit for test_document_1 match property "subject". + // So this.propertyWeights() for test_document_1 will return [1]. + DocHitInfo doc_hit_info_1 = DocHitInfo(document_id_1); + doc_hit_info_1.UpdateSection(0); + + // Let the hit for test_document_2 match properties "emailAddress" and "name". + // So this.propertyWeights() for test_document_2 will return [0.5, 0.8]. + DocHitInfo doc_hit_info_2 = DocHitInfo(document_id_2); + doc_hit_info_2.UpdateSection(0); + doc_hit_info_2.UpdateSection(1); + + // Let the hit for test_document_3 match properties "emailAddress", "name" and + // "phoneNumber". So this.propertyWeights() for test_document_3 will return + // [0.5, 0.8, 1]. + DocHitInfo doc_hit_info_3 = DocHitInfo(document_id_3); + doc_hit_info_3.UpdateSection(0); + doc_hit_info_3.UpdateSection(1); + doc_hit_info_3.UpdateSection(2); + + spec_proto.set_advanced_scoring_expression("min(this.propertyWeights())"); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<AdvancedScorer> scorer, + AdvancedScorer::Create(spec_proto, + /*default_score=*/10, document_store_.get(), + schema_store_.get())); + // min([1]) = 1 + EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1)); + // min([0.5, 0.8]) = 0.5 + EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.5)); + // min([0.5, 0.8, 1.0]) = 0.5 + EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(0.5)); + + spec_proto.set_advanced_scoring_expression("max(this.propertyWeights())"); + ICING_ASSERT_OK_AND_ASSIGN( + scorer, + AdvancedScorer::Create(spec_proto, + /*default_score=*/10, document_store_.get(), + schema_store_.get())); + // max([1]) = 1 + EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1)); + // max([0.5, 0.8]) = 0.8 + EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.8)); + // max([0.5, 0.8, 1.0]) = 1 + EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(1)); + + spec_proto.set_advanced_scoring_expression("sum(this.propertyWeights())"); + ICING_ASSERT_OK_AND_ASSIGN( + scorer, + AdvancedScorer::Create(spec_proto, + /*default_score=*/10, document_store_.get(), + schema_store_.get())); + // sum([1]) = 1 + EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1)); + // sum([0.5, 0.8]) = 1.3 + EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(1.3)); + // sum([0.5, 0.8, 1.0]) = 2.3 + EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(2.3)); +} + +TEST_F(AdvancedScorerTest, + PropertyWeightsFunctionScoreExpressionUnspecifiedWeights) { + DocumentProto test_document_1 = + DocumentBuilder().SetKey("namespace", "uri1").SetSchema("email").Build(); + DocumentProto test_document_2 = + DocumentBuilder().SetKey("namespace", "uri2").SetSchema("person").Build(); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_1, + document_store_->Put(test_document_1)); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_2, + document_store_->Put(test_document_2)); + + ScoringSpecProto spec_proto = CreateAdvancedScoringSpec(""); + + // The entry for type "email" is missing, so every properties in "email" + // should get weight 1.0. + // The weight of "phoneNumber" in "person" type is unspecified, which should + // default to 1/2 = 0.5 + *spec_proto.add_type_property_weights() = CreateTypePropertyWeights( + /*schema_type=*/"person", + {CreatePropertyWeight(/*path=*/"emailAddress", /*weight=*/1.0), + CreatePropertyWeight(/*path=*/"name", /*weight=*/2)}); + + // Let the hit for test_document_1 match property "subject". + // So this.propertyWeights() for test_document_1 will return [1]. + DocHitInfo doc_hit_info_1 = DocHitInfo(document_id_1); + doc_hit_info_1.UpdateSection(0); + + // Let the hit for test_document_2 match properties "emailAddress", "name" and + // "phoneNumber". So this.propertyWeights() for test_document_3 will return + // [0.5, 1, 0.5]. + DocHitInfo doc_hit_info_2 = DocHitInfo(document_id_2); + doc_hit_info_2.UpdateSection(0); + doc_hit_info_2.UpdateSection(1); + doc_hit_info_2.UpdateSection(2); + + spec_proto.set_advanced_scoring_expression("min(this.propertyWeights())"); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<AdvancedScorer> scorer, + AdvancedScorer::Create(spec_proto, + /*default_score=*/10, document_store_.get(), + schema_store_.get())); + // min([1]) = 1 + EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1)); + // min([0.5, 1, 0.5]) = 0.5 + EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.5)); + + spec_proto.set_advanced_scoring_expression("max(this.propertyWeights())"); + ICING_ASSERT_OK_AND_ASSIGN( + scorer, + AdvancedScorer::Create(spec_proto, + /*default_score=*/10, document_store_.get(), + schema_store_.get())); + // max([1]) = 1 + EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1)); + // max([0.5, 1, 0.5]) = 1 + EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(1)); + + spec_proto.set_advanced_scoring_expression("sum(this.propertyWeights())"); + ICING_ASSERT_OK_AND_ASSIGN( + scorer, + AdvancedScorer::Create(spec_proto, + /*default_score=*/10, document_store_.get(), + schema_store_.get())); + // sum([1]) = 1 + EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1)); + // sum([0.5, 1, 0.5]) = 2 + EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(2)); +} + TEST_F(AdvancedScorerTest, InvalidChildrenScoresFunctionScoreExpression) { const double default_score = 123; diff --git a/icing/scoring/advanced_scoring/score-expression.cc b/icing/scoring/advanced_scoring/score-expression.cc index 7a8f046..3851660 100644 --- a/icing/scoring/advanced_scoring/score-expression.cc +++ b/icing/scoring/advanced_scoring/score-expression.cc @@ -459,5 +459,58 @@ ChildrenScoresFunctionScoreExpression::eval_list( return std::move(children_scores); } +libtextclassifier3::StatusOr< + std::unique_ptr<PropertyWeightsFunctionScoreExpression>> +PropertyWeightsFunctionScoreExpression::Create( + std::vector<std::unique_ptr<ScoreExpression>> args, + const DocumentStore* document_store, + const SectionWeights* section_weights) { + if (args.size() != 1) { + return absl_ports::InvalidArgumentError( + "propertyWeights must have 1 argument."); + } + ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args)); + + if (args[0]->type() != ScoreExpressionType::kDocument) { + return absl_ports::InvalidArgumentError( + "propertyWeights must take \"this\" as its argument."); + } + return std::unique_ptr<PropertyWeightsFunctionScoreExpression>( + new PropertyWeightsFunctionScoreExpression(document_store, + section_weights)); +} + +libtextclassifier3::StatusOr<std::vector<double>> +PropertyWeightsFunctionScoreExpression::eval_list( + const DocHitInfo& hit_info, const DocHitInfoIterator*) const { + std::vector<double> weights; + SectionIdMask sections = hit_info.hit_section_ids_mask(); + SchemaTypeId schema_type_id = GetSchemaTypeId(hit_info.document_id()); + + while (sections != 0) { + SectionId section_id = __builtin_ctzll(sections); + sections &= ~(UINT64_C(1) << section_id); + weights.push_back(section_weights_.GetNormalizedSectionWeight( + schema_type_id, section_id)); + } + return weights; +} + +SchemaTypeId PropertyWeightsFunctionScoreExpression::GetSchemaTypeId( + DocumentId document_id) const { + auto filter_data_optional = + document_store_.GetAliveDocumentFilterData(document_id); + if (!filter_data_optional) { + // This should never happen. The only failure case for + // GetDocumentFilterData is if the document_id is outside of the range of + // allocated document_ids, which shouldn't be possible since we're getting + // this document_id from the posting lists. + ICING_LOG(WARNING) << "No document filter data for document [" + << document_id << "]"; + return kInvalidSchemaTypeId; + } + return filter_data_optional.value().schema_type_id(); +} + } // namespace lib } // namespace icing diff --git a/icing/scoring/advanced_scoring/score-expression.h b/icing/scoring/advanced_scoring/score-expression.h index b70cd46..ad719fe 100644 --- a/icing/scoring/advanced_scoring/score-expression.h +++ b/icing/scoring/advanced_scoring/score-expression.h @@ -303,6 +303,38 @@ class ChildrenScoresFunctionScoreExpression : public ScoreExpression { const JoinChildrenFetcher& join_children_fetcher_; }; +class PropertyWeightsFunctionScoreExpression : public ScoreExpression { + public: + static constexpr std::string_view kFunctionName = "propertyWeights"; + + // RETURNS: + // - A PropertyWeightsFunctionScoreExpression instance on success. + // - FAILED_PRECONDITION on any null pointer in children. + // - INVALID_ARGUMENT on type errors. + static libtextclassifier3::StatusOr< + std::unique_ptr<PropertyWeightsFunctionScoreExpression>> + Create(std::vector<std::unique_ptr<ScoreExpression>> args, + const DocumentStore* document_store, + const SectionWeights* section_weights); + + libtextclassifier3::StatusOr<std::vector<double>> eval_list( + const DocHitInfo& hit_info, const DocHitInfoIterator*) const override; + + ScoreExpressionType type() const override { + return ScoreExpressionType::kDoubleList; + } + + SchemaTypeId GetSchemaTypeId(DocumentId document_id) const; + + private: + explicit PropertyWeightsFunctionScoreExpression( + const DocumentStore* document_store, + const SectionWeights* section_weights) + : document_store_(*document_store), section_weights_(*section_weights) {} + const DocumentStore& document_store_; + const SectionWeights& section_weights_; +}; + } // namespace lib } // namespace icing diff --git a/icing/scoring/advanced_scoring/scoring-visitor.cc b/icing/scoring/advanced_scoring/scoring-visitor.cc index 059e65b..b3b831c 100644 --- a/icing/scoring/advanced_scoring/scoring-visitor.cc +++ b/icing/scoring/advanced_scoring/scoring-visitor.cc @@ -109,6 +109,11 @@ void ScoringVisitor::VisitFunctionHelper(const FunctionNode* node, // childrenScores function expression = ChildrenScoresFunctionScoreExpression::Create( std::move(args), join_children_fetcher_); + } else if (function_name == + PropertyWeightsFunctionScoreExpression::kFunctionName) { + // propertyWeights function + expression = PropertyWeightsFunctionScoreExpression::Create( + std::move(args), &document_store_, §ion_weights_); } else if (MathFunctionScoreExpression::kFunctionNames.find(function_name) != MathFunctionScoreExpression::kFunctionNames.end()) { // Math functions diff --git a/icing/scoring/advanced_scoring/scoring-visitor.h b/icing/scoring/advanced_scoring/scoring-visitor.h index 9b01f73..9d9c105 100644 --- a/icing/scoring/advanced_scoring/scoring-visitor.h +++ b/icing/scoring/advanced_scoring/scoring-visitor.h @@ -33,11 +33,13 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor { explicit ScoringVisitor(double default_score, const DocumentStore* document_store, const SchemaStore* schema_store, + SectionWeights* section_weights, Bm25fCalculator* bm25f_calculator, const JoinChildrenFetcher* join_children_fetcher) : default_score_(default_score), document_store_(*document_store), schema_store_(*schema_store), + section_weights_(*section_weights), bm25f_calculator_(*bm25f_calculator), join_children_fetcher_(join_children_fetcher) {} @@ -88,6 +90,7 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor { double default_score_; const DocumentStore& document_store_; const SchemaStore& schema_store_; + SectionWeights& section_weights_; Bm25fCalculator& bm25f_calculator_; // A non-null join_children_fetcher_ indicates scoring in a join. const JoinChildrenFetcher* join_children_fetcher_; // Does not own. diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc index f169cda..9120cc7 100644 --- a/icing/scoring/bm25f-calculator.cc +++ b/icing/scoring/bm25f-calculator.cc @@ -42,11 +42,9 @@ constexpr float k1_ = 1.2f; constexpr float b_ = 0.7f; // TODO(b/158603900): add tests for Bm25fCalculator -Bm25fCalculator::Bm25fCalculator( - const DocumentStore* document_store, - std::unique_ptr<SectionWeights> section_weights) - : document_store_(document_store), - section_weights_(std::move(section_weights)) {} +Bm25fCalculator::Bm25fCalculator(const DocumentStore* document_store, + SectionWeights* section_weights) + : document_store_(document_store), section_weights_(*section_weights) {} // During initialization, Bm25fCalculator iterates through // hit-iterators for each query term to pre-compute n(q_i) for each corpus under @@ -219,7 +217,7 @@ float Bm25fCalculator::ComputeTermFrequencyForMatchedSections( sections &= ~(UINT64_C(1) << section_id); Hit::TermFrequency tf = term_match_info.term_frequencies[section_id]; - double weighted_tf = tf * section_weights_->GetNormalizedSectionWeight( + double weighted_tf = tf * section_weights_.GetNormalizedSectionWeight( schema_type_id, section_id); if (tf != Hit::kNoTermFrequency) { sum += weighted_tf; diff --git a/icing/scoring/bm25f-calculator.h b/icing/scoring/bm25f-calculator.h index 05009d8..dc5e081 100644 --- a/icing/scoring/bm25f-calculator.h +++ b/icing/scoring/bm25f-calculator.h @@ -63,8 +63,8 @@ namespace lib { // see: glossary/bm25 class Bm25fCalculator { public: - explicit Bm25fCalculator(const DocumentStore *document_store_, - std::unique_ptr<SectionWeights> section_weights_); + explicit Bm25fCalculator(const DocumentStore *document_store, + SectionWeights *section_weights); // Precompute and cache statistics relevant to BM25F. // Populates term_id_map_ and corpus_nqi_map_ for use while scoring other @@ -145,7 +145,7 @@ class Bm25fCalculator { // Used for accessing normalized section weights when computing the weighted // term frequency. - std::unique_ptr<SectionWeights> section_weights_; + SectionWeights §ion_weights_; // Map from query term to compact term ID. // Necessary as a key to the other maps. diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc index 9a126dc..076e36a 100644 --- a/icing/scoring/score-and-rank_benchmark.cc +++ b/icing/scoring/score-and-rank_benchmark.cc @@ -89,6 +89,17 @@ DocumentProto CreateEmailDocument(int id, int document_score, .Build(); } +libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore( + const Filesystem* filesystem, const std::string& base_dir, + const Clock* clock, const SchemaStore* schema_store) { + return DocumentStore::Create( + filesystem, base_dir, clock, schema_store, + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr); +} + void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) { const std::string base_dir = GetTestTempDir() + "/score_and_rank_benchmark"; const std::string document_store_dir = base_dir + "/document_store"; @@ -107,8 +118,8 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -208,8 +219,8 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -312,8 +323,8 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -410,8 +421,8 @@ void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); diff --git a/icing/scoring/scorer-factory.cc b/icing/scoring/scorer-factory.cc index d4d8b79..5b8609b 100644 --- a/icing/scoring/scorer-factory.cc +++ b/icing/scoring/scorer-factory.cc @@ -78,8 +78,10 @@ class DocumentCreationTimestampScorer : public Scorer { class RelevanceScoreScorer : public Scorer { public: explicit RelevanceScoreScorer( + std::unique_ptr<SectionWeights> section_weights, std::unique_ptr<Bm25fCalculator> bm25f_calculator, double default_score) - : bm25f_calculator_(std::move(bm25f_calculator)), + : section_weights_(std::move(section_weights)), + bm25f_calculator_(std::move(bm25f_calculator)), default_score_(default_score) {} void PrepareToScore( @@ -99,6 +101,7 @@ class RelevanceScoreScorer : public Scorer { } private: + std::unique_ptr<SectionWeights> section_weights_; std::unique_ptr<Bm25fCalculator> bm25f_calculator_; double default_score_; }; @@ -193,8 +196,9 @@ libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create( SectionWeights::Create(schema_store, scoring_spec)); auto bm25f_calculator = std::make_unique<Bm25fCalculator>( - document_store, std::move(section_weights)); - return std::make_unique<RelevanceScoreScorer>(std::move(bm25f_calculator), + document_store, section_weights.get()); + return std::make_unique<RelevanceScoreScorer>(std::move(section_weights), + std::move(bm25f_calculator), default_score); } case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT: diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc index 08705a1..2649c95 100644 --- a/icing/scoring/scorer_test.cc +++ b/icing/scoring/scorer_test.cc @@ -65,7 +65,12 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock1_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); // Creates a simple email schema diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc index 1e579be..5c42236 100644 --- a/icing/scoring/scoring-processor_test.cc +++ b/icing/scoring/scoring-processor_test.cc @@ -63,7 +63,12 @@ class ScoringProcessorTest ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_, - schema_store_.get())); + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); document_store_ = std::move(create_result.document_store); // Creates a simple email schema diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc index c9769f2..2abd315 100644 --- a/icing/store/document-log-creator.cc +++ b/icing/store/document-log-creator.cc @@ -66,7 +66,8 @@ std::string DocumentLogCreator::GetDocumentLogFilename() { libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> DocumentLogCreator::Create(const Filesystem* filesystem, - const std::string& base_dir) { + const std::string& base_dir, + int32_t compression_level) { bool v0_exists = filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str()); bool v1_exists = @@ -75,7 +76,8 @@ DocumentLogCreator::Create(const Filesystem* filesystem, bool new_file = false; int preexisting_file_version = kCurrentVersion; if (v0_exists && !v1_exists) { - ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir)); + ICING_RETURN_IF_ERROR( + MigrateFromV0ToV1(filesystem, base_dir, compression_level)); // Need to regenerate derived files since documents may be written to a // different file offset in the log. @@ -94,7 +96,9 @@ DocumentLogCreator::Create(const Filesystem* filesystem, PortableFileBackedProtoLog<DocumentWrapper>::Create( filesystem, MakeDocumentLogFilenameV1(base_dir), PortableFileBackedProtoLog<DocumentWrapper>::Options( - /*compress_in=*/true))); + /*compress_in=*/true, + PortableFileBackedProtoLog<DocumentWrapper>::kMaxProtoSize, + compression_level))); CreateResult create_result = {std::move(log_create_result), preexisting_file_version, new_file}; @@ -102,15 +106,15 @@ DocumentLogCreator::Create(const Filesystem* filesystem, } libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1( - const Filesystem* filesystem, const std::string& base_dir) { + const Filesystem* filesystem, const std::string& base_dir, + int32_t compression_level) { ICING_VLOG(1) << "Migrating from v0 to v1 document log."; // Our v0 proto log was non-portable, create it so we can read protos out from // it. auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create( filesystem, MakeDocumentLogFilenameV0(base_dir), - FileBackedProtoLog<DocumentWrapper>::Options( - /*compress_in=*/true)); + FileBackedProtoLog<DocumentWrapper>::Options(/*compress_in=*/true)); if (!v0_create_result_or.ok()) { return absl_ports::Annotate( v0_create_result_or.status(), @@ -127,7 +131,10 @@ libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1( PortableFileBackedProtoLog<DocumentWrapper>::Create( filesystem, MakeDocumentLogFilenameV1(base_dir), PortableFileBackedProtoLog<DocumentWrapper>::Options( - /*compress_in=*/true)); + /*compress_in=*/true, + /*max_proto_size_in=*/ + PortableFileBackedProtoLog<DocumentWrapper>::kMaxProtoSize, + /*compression_level_in=*/compression_level)); if (!v1_create_result_or.ok()) { return absl_ports::Annotate( v1_create_result_or.status(), diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h index be8feed..0c2794a 100644 --- a/icing/store/document-log-creator.h +++ b/icing/store/document-log-creator.h @@ -57,7 +57,8 @@ class DocumentLogCreator { // CreateResult on success. // INTERNAL on any I/O error. static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create( - const Filesystem* filesystem, const std::string& base_dir); + const Filesystem* filesystem, const std::string& base_dir, + int32_t compression_level); // Returns the filename of the document log, without any directory prefixes. // Used mainly for testing purposes. @@ -74,7 +75,8 @@ class DocumentLogCreator { // INVALID_ARGUMENT if some invalid option was passed to the document log. // INTERNAL on I/O error. static libtextclassifier3::Status MigrateFromV0ToV1( - const Filesystem* filesystem, const std::string& base_dir); + const Filesystem* filesystem, const std::string& base_dir, + int32_t compression_level); }; } // namespace lib diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index a81ae4b..ae8bfc0 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -230,13 +230,15 @@ DocumentStore::DocumentStore(const Filesystem* filesystem, const std::string_view base_dir, const Clock* clock, const SchemaStore* schema_store, - bool namespace_id_fingerprint) + bool namespace_id_fingerprint, + int32_t compression_level) : filesystem_(filesystem), base_dir_(base_dir), clock_(*clock), schema_store_(schema_store), document_validator_(schema_store), - namespace_id_fingerprint_(namespace_id_fingerprint) {} + namespace_id_fingerprint_(namespace_id_fingerprint), + compression_level_(compression_level) {} libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put( const DocumentProto& document, int32_t num_tokens, @@ -264,13 +266,14 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create( const Filesystem* filesystem, const std::string& base_dir, const Clock* clock, const SchemaStore* schema_store, bool force_recovery_and_revalidate_documents, bool namespace_id_fingerprint, - InitializeStatsProto* initialize_stats) { + int32_t compression_level, InitializeStatsProto* initialize_stats) { ICING_RETURN_ERROR_IF_NULL(filesystem); ICING_RETURN_ERROR_IF_NULL(clock); ICING_RETURN_ERROR_IF_NULL(schema_store); auto document_store = std::unique_ptr<DocumentStore>(new DocumentStore( - filesystem, base_dir, clock, schema_store, namespace_id_fingerprint)); + filesystem, base_dir, clock, schema_store, namespace_id_fingerprint, + compression_level)); ICING_ASSIGN_OR_RETURN( DataLoss data_loss, document_store->Initialize(force_recovery_and_revalidate_documents, @@ -285,7 +288,8 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create( libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( bool force_recovery_and_revalidate_documents, InitializeStatsProto* initialize_stats) { - auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_); + auto create_result_or = + DocumentLogCreator::Create(filesystem_, base_dir_, compression_level_); // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN // that can support error logging. @@ -1725,6 +1729,7 @@ libtextclassifier3::Status DocumentStore::Optimize() { libtextclassifier3::StatusOr<std::vector<DocumentId>> DocumentStore::OptimizeInto(const std::string& new_directory, const LanguageSegmenter* lang_segmenter, + bool namespace_id_fingerprint, OptimizeStatsProto* stats) { // Validates directory if (new_directory == base_dir_) { @@ -1732,9 +1737,12 @@ DocumentStore::OptimizeInto(const std::string& new_directory, "New directory is the same as the current one."); } - ICING_ASSIGN_OR_RETURN(auto doc_store_create_result, - DocumentStore::Create(filesystem_, new_directory, - &clock_, schema_store_)); + ICING_ASSIGN_OR_RETURN( + auto doc_store_create_result, + DocumentStore::Create(filesystem_, new_directory, &clock_, schema_store_, + /*force_recovery_and_revalidate_documents=*/false, + namespace_id_fingerprint, compression_level_, + /*initialize_stats=*/nullptr)); std::unique_ptr<DocumentStore> new_doc_store = std::move(doc_store_create_result.document_store); diff --git a/icing/store/document-store.h b/icing/store/document-store.h index 6feed2e..88050ce 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -141,9 +141,10 @@ class DocumentStore { static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create( const Filesystem* filesystem, const std::string& base_dir, const Clock* clock, const SchemaStore* schema_store, - bool force_recovery_and_revalidate_documents = false, - bool namespace_id_fingerprint = false, - InitializeStatsProto* initialize_stats = nullptr); + bool force_recovery_and_revalidate_documents, + bool namespace_id_fingerprint, + int32_t compression_level, + InitializeStatsProto* initialize_stats); // Returns the maximum DocumentId that the DocumentStore has assigned. If // there has not been any DocumentIds assigned, i.e. the DocumentStore is @@ -445,7 +446,7 @@ class DocumentStore { // INTERNAL_ERROR on IO error libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeInto( const std::string& new_directory, const LanguageSegmenter* lang_segmenter, - OptimizeStatsProto* stats = nullptr); + bool namespace_id_fingerprint, OptimizeStatsProto* stats = nullptr); // Calculates status for a potential Optimize call. Includes how many docs // there are vs how many would be optimized away. And also includes an @@ -479,7 +480,7 @@ class DocumentStore { // Use DocumentStore::Create() to instantiate. DocumentStore(const Filesystem* filesystem, std::string_view base_dir, const Clock* clock, const SchemaStore* schema_store, - bool namespace_id_fingerprint); + bool namespace_id_fingerprint, int32_t compression_level); const Filesystem* const filesystem_; const std::string base_dir_; @@ -496,6 +497,8 @@ class DocumentStore { // document_key_mapper_ and corpus_mapper_. bool namespace_id_fingerprint_; + const int32_t compression_level_; + // A log used to store all documents, it serves as a ground truth of doc // store. key_mapper_ and document_id_mapper_ can be regenerated from it. std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_; diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc index a4b3a17..99e17c7 100644 --- a/icing/store/document-store_benchmark.cc +++ b/icing/store/document-store_benchmark.cc @@ -124,6 +124,17 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem, return schema_store; } +libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore( + const Filesystem* filesystem, const std::string& base_dir, + const Clock* clock, const SchemaStore* schema_store) { + return DocumentStore::Create( + filesystem, base_dir, clock, schema_store, + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr); +} + void BM_DoesDocumentExistBenchmark(benchmark::State& state) { Filesystem filesystem; Clock clock; @@ -138,8 +149,8 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) { filesystem.CreateDirectoryRecursively(document_store_dir.data()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -178,8 +189,8 @@ void BM_Put(benchmark::State& state) { filesystem.CreateDirectoryRecursively(document_store_dir.data()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -207,8 +218,8 @@ void BM_GetSameDocument(benchmark::State& state) { filesystem.CreateDirectoryRecursively(document_store_dir.data()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -234,8 +245,8 @@ void BM_Delete(benchmark::State& state) { filesystem.CreateDirectoryRecursively(document_store_dir.data()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -268,8 +279,8 @@ void BM_Create(benchmark::State& state) { filesystem.CreateDirectoryRecursively(document_store_dir.data()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -284,7 +295,7 @@ void BM_Create(benchmark::State& state) { filesystem.CreateDirectoryRecursively(document_store_dir.data()); for (auto s : state) { - benchmark::DoNotOptimize(DocumentStore::Create( + benchmark::DoNotOptimize(CreateDocumentStore( &filesystem, document_store_dir, &clock, schema_store.get())); } } @@ -304,8 +315,8 @@ void BM_ComputeChecksum(benchmark::State& state) { filesystem.CreateDirectoryRecursively(document_store_dir.data()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem, document_store_dir, &clock, - schema_store.get())); + CreateDocumentStore(&filesystem, document_store_dir, &clock, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index 1d9a2a0..896d852 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -215,6 +215,17 @@ class DocumentStoreTest : public ::testing::Test { filesystem_.Write(header_file.c_str(), &header, sizeof(header)); } + libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore( + const Filesystem* filesystem, const std::string& base_dir, + const Clock* clock, const SchemaStore* schema_store) { + return DocumentStore::Create( + filesystem, base_dir, clock, schema_store, + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr); + } + const Filesystem filesystem_; const std::string test_dir_; FakeClock fake_clock_; @@ -240,16 +251,16 @@ class DocumentStoreTest : public ::testing::Test { }; TEST_F(DocumentStoreTest, CreationWithNullPointerShouldFail) { - EXPECT_THAT(DocumentStore::Create(/*filesystem=*/nullptr, document_store_dir_, - &fake_clock_, schema_store_.get()), + EXPECT_THAT(CreateDocumentStore(/*filesystem=*/nullptr, document_store_dir_, + &fake_clock_, schema_store_.get()), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT(DocumentStore::Create(&filesystem_, document_store_dir_, - /*clock=*/nullptr, schema_store_.get()), + EXPECT_THAT(CreateDocumentStore(&filesystem_, document_store_dir_, + /*clock=*/nullptr, schema_store_.get()), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT(DocumentStore::Create(&filesystem_, document_store_dir_, - &fake_clock_, /*schema_store=*/nullptr), + EXPECT_THAT(CreateDocumentStore(&filesystem_, document_store_dir_, + &fake_clock_, /*schema_store=*/nullptr), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } @@ -257,16 +268,16 @@ TEST_F(DocumentStoreTest, CreationWithBadFilesystemShouldFail) { MockFilesystem mock_filesystem; ON_CALL(mock_filesystem, OpenForWrite(_)).WillByDefault(Return(false)); - EXPECT_THAT(DocumentStore::Create(&mock_filesystem, document_store_dir_, - &fake_clock_, schema_store_.get()), + EXPECT_THAT(CreateDocumentStore(&mock_filesystem, document_store_dir_, + &fake_clock_, schema_store_.get()), StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -285,8 +296,8 @@ TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) { TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -318,8 +329,8 @@ TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) { TEST_F(DocumentStoreTest, PutSameKey) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -347,8 +358,8 @@ TEST_F(DocumentStoreTest, PutSameKey) { TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -378,8 +389,8 @@ TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) { TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -405,8 +416,8 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -433,8 +444,8 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) { TEST_F(DocumentStoreTest, GetInvalidDocumentId) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -460,8 +471,8 @@ TEST_F(DocumentStoreTest, GetInvalidDocumentId) { TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -486,8 +497,8 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) { TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -515,8 +526,8 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) { TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -535,8 +546,8 @@ TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) { TEST_F(DocumentStoreTest, DeleteByNamespaceOk) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -579,8 +590,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceOk) { TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -604,8 +615,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) { TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -642,8 +653,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) { { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -669,8 +680,8 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) { // Successfully recover from a corrupt derived file issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -710,8 +721,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -779,8 +790,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) { TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -805,8 +816,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) { TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -853,8 +864,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -879,8 +890,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { // Successfully recover from a corrupt derived file issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -900,8 +911,8 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { TEST_F(DocumentStoreTest, PutDeleteThenPut) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); ICING_EXPECT_OK(doc_store->Put(test_document1_)); @@ -944,8 +955,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -983,8 +994,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { // Successfully recover from a corrupt derived file issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -1004,8 +1015,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { TEST_F(DocumentStoreTest, OptimizeInto) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1045,7 +1056,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) { // Optimizing into the same directory is not allowed EXPECT_THAT( - doc_store->OptimizeInto(document_store_dir_, lang_segmenter_.get()), + doc_store->OptimizeInto(document_store_dir_, lang_segmenter_.get(), + /*namespace_id_fingerprint=*/false), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, HasSubstr("directory is the same"))); @@ -1057,7 +1069,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) { // deleted ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); - EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), + EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(), + /*namespace_id_fingerprint=*/false), IsOkAndHolds(ElementsAre(0, 1, 2))); int64_t optimized_size1 = filesystem_.GetFileSize(optimized_document_log.c_str()); @@ -1069,7 +1082,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) { ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1")); // DocumentId 0 is removed. - EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), + EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(), + /*namespace_id_fingerprint=*/false), IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, 1))); int64_t optimized_size2 = filesystem_.GetFileSize(optimized_document_log.c_str()); @@ -1085,7 +1099,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) { ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); // DocumentId 0 is removed, and DocumentId 2 is expired. EXPECT_THAT( - doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(), + /*namespace_id_fingerprint=*/false), IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId))); int64_t optimized_size3 = filesystem_.GetFileSize(optimized_document_log.c_str()); @@ -1096,7 +1111,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) { ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2")); // DocumentId 0 and 1 is removed, and DocumentId 2 is expired. - EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), + EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(), + /*namespace_id_fingerprint=*/false), IsOkAndHolds(ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId))); int64_t optimized_size4 = @@ -1107,14 +1123,15 @@ TEST_F(DocumentStoreTest, OptimizeInto) { TEST_F(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); std::string optimized_dir = document_store_dir_ + "_optimize"; ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); - EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), + EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(), + /*namespace_id_fingerprint=*/false), IsOkAndHolds(IsEmpty())); } @@ -1124,8 +1141,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) { // Can put and delete fine. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1177,8 +1194,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) { // Successfully recover from a data loss issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1212,8 +1229,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) { // Can put and delete fine. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1268,8 +1285,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) { // NOTE: this doesn't trigger RegenerateDerivedFiles. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1305,8 +1322,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) { // Can put and delete fine. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1345,8 +1362,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) { // Successfully recover from a corrupt derived file issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1377,8 +1394,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) { TEST_F(DocumentStoreTest, GetStorageInfo) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1408,9 +1425,8 @@ TEST_F(DocumentStoreTest, GetStorageInfo) { ON_CALL(mock_filesystem, GetDiskUsage(A<const char*>())) .WillByDefault(Return(Filesystem::kBadFileSize)); ICING_ASSERT_OK_AND_ASSIGN( - create_result, - DocumentStore::Create(&mock_filesystem, document_store_dir_, &fake_clock_, - schema_store_.get())); + create_result, CreateDocumentStore(&mock_filesystem, document_store_dir_, + &fake_clock_, schema_store_.get())); std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem = std::move(create_result.document_store); @@ -1421,8 +1437,8 @@ TEST_F(DocumentStoreTest, GetStorageInfo) { TEST_F(DocumentStoreTest, MaxDocumentId) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1445,8 +1461,8 @@ TEST_F(DocumentStoreTest, MaxDocumentId) { TEST_F(DocumentStoreTest, GetNamespaceId) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1478,8 +1494,8 @@ TEST_F(DocumentStoreTest, GetNamespaceId) { TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1498,8 +1514,8 @@ TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) { TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1510,8 +1526,8 @@ TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) { TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1531,8 +1547,8 @@ TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) { TEST_F(DocumentStoreTest, GetCorpusId) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1566,8 +1582,8 @@ TEST_F(DocumentStoreTest, GetCorpusId) { TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1590,8 +1606,8 @@ TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) { TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1615,8 +1631,8 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) { TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1653,8 +1669,8 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) { TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1665,8 +1681,8 @@ TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) { TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1709,8 +1725,8 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) { TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1753,8 +1769,8 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) { TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1765,8 +1781,8 @@ TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) { TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1776,8 +1792,8 @@ TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) { TEST_F(DocumentStoreTest, DeleteClearsFilterCache) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1800,8 +1816,8 @@ TEST_F(DocumentStoreTest, DeleteClearsFilterCache) { TEST_F(DocumentStoreTest, DeleteClearsScoreCache) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1824,8 +1840,8 @@ TEST_F(DocumentStoreTest, DeleteClearsScoreCache) { TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1859,8 +1875,8 @@ TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) { TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1914,8 +1930,8 @@ TEST_F(DocumentStoreTest, ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1939,8 +1955,8 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -1969,8 +1985,8 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -2002,8 +2018,8 @@ TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) { fake_clock_.SetSystemTimeMilliseconds(fake_real_time); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -2035,8 +2051,8 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -2061,8 +2077,8 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) { TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2076,8 +2092,8 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) { TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2087,8 +2103,8 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) { // Destroy the previous instance and recreate DocumentStore document_store.reset(); ICING_ASSERT_OK_AND_ASSIGN( - create_result, DocumentStore::Create(&filesystem_, document_store_dir_, - &fake_clock_, schema_store_.get())); + create_result, CreateDocumentStore(&filesystem_, document_store_dir_, + &fake_clock_, schema_store_.get())); document_store = std::move(create_result.document_store); EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum)); @@ -2097,8 +2113,8 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) { TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2113,8 +2129,8 @@ TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) { TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2170,8 +2186,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2225,8 +2241,8 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { // because the "message" schema type is missing ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2292,8 +2308,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) { // Add the documents and check SchemaTypeIds match ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2382,8 +2398,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) { // Insert documents and check they're ok ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2452,8 +2468,8 @@ TEST_F(DocumentStoreTest, // Insert documents and check they're ok ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2524,8 +2540,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) { // Add the documents and check SchemaTypeIds match ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2616,8 +2632,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) { // Insert documents and check they're ok ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2689,8 +2705,8 @@ TEST_F(DocumentStoreTest, // Insert documents and check they're ok ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2729,8 +2745,8 @@ TEST_F(DocumentStoreTest, TEST_F(DocumentStoreTest, GetOptimizeInfo) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2763,11 +2779,12 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) { EXPECT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); EXPECT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); ICING_ASSERT_OK( - document_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + document_store->OptimizeInto(optimized_dir, lang_segmenter_.get(), + /*namespace_id_fingerprint=*/false)); document_store.reset(); ICING_ASSERT_OK_AND_ASSIGN( - create_result, DocumentStore::Create(&filesystem_, optimized_dir, - &fake_clock_, schema_store_.get())); + create_result, CreateDocumentStore(&filesystem_, optimized_dir, + &fake_clock_, schema_store_.get())); std::unique_ptr<DocumentStore> optimized_document_store = std::move(create_result.document_store); @@ -2781,8 +2798,8 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) { TEST_F(DocumentStoreTest, GetAllNamespaces) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2851,8 +2868,8 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) { TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2937,8 +2954,8 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) { TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -2987,8 +3004,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) { { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -3012,8 +3029,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) { // Successfully recover from a corrupt derived file issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -3030,8 +3047,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) { { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -3066,8 +3083,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) { // Successfully recover from a data loss issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -3081,8 +3098,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) { TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -3119,8 +3136,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) { TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -3149,12 +3166,13 @@ TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) { std::string optimized_dir = document_store_dir_ + "/optimize_test"; filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()); ICING_ASSERT_OK( - document_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + document_store->OptimizeInto(optimized_dir, lang_segmenter_.get(), + /*namespace_id_fingerprint=*/false)); // Get optimized document store ICING_ASSERT_OK_AND_ASSIGN( - create_result, DocumentStore::Create(&filesystem_, optimized_dir, - &fake_clock_, schema_store_.get())); + create_result, CreateDocumentStore(&filesystem_, optimized_dir, + &fake_clock_, schema_store_.get())); std::unique_ptr<DocumentStore> optimized_document_store = std::move(create_result.document_store); @@ -3171,8 +3189,8 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) { // Can put and delete fine. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE)); @@ -3200,8 +3218,8 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) { // Successfully recover from a data loss issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL)); @@ -3215,8 +3233,8 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) { // Can put and delete fine. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE)); @@ -3265,8 +3283,8 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) { // Successfully recover from a data loss issue. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE)); @@ -3314,7 +3332,9 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) { DocumentStore::Create( &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false, - /*namespace_id_fingerprint=*/false, &initialize_stats)); + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + &initialize_stats)); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); // The document log is using the legacy v0 format so that a migration is @@ -3326,8 +3346,8 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) { TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3464,8 +3484,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) { // Create the document store the first time and add an email document. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3515,10 +3535,13 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) { InitializeStatsProto initialize_stats; ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create( - &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(), - /*force_recovery_and_revalidate_documents=*/true, - /*namespace_id_fingerprint=*/false, &initialize_stats)); + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get(), + /*force_recovery_and_revalidate_documents=*/true, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + &initialize_stats)); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3564,8 +3587,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) { // Create the document store the first time and add an email document. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3614,9 +3637,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) { // Create the document store the second time. Don't force recovery. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create( - &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(), - /*force_recovery_and_revalidate_documents=*/false)); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3680,8 +3702,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) { // that has the 'body' section and one that doesn't. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3719,9 +3741,13 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) { CorruptDocStoreHeaderChecksumFile(); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create( - &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(), - /*force_recovery_and_revalidate_documents=*/true)); + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get(), + /*force_recovery_and_revalidate_documents=*/true, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3785,8 +3811,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { // that has the 'body' section and one that doesn't. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3825,9 +3851,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { CorruptDocStoreHeaderChecksumFile(); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create( - &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(), - /*force_recovery_and_revalidate_documents=*/false)); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); @@ -3904,7 +3929,9 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) { DocumentStore::Create( &filesystem_, document_store_dir, &fake_clock_, schema_store.get(), /*force_recovery_and_revalidate_documents=*/false, - /*namespace_id_fingerprint=*/false, &initialize_stats)); + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + &initialize_stats)); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -3992,8 +4019,8 @@ TEST_F(DocumentStoreTest, GetDebugInfo) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); @@ -4092,8 +4119,8 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); ICING_ASSERT_OK_AND_ASSIGN( @@ -4109,8 +4136,8 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) { TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get())); + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/util/tokenized-document_test.cc b/icing/util/tokenized-document_test.cc index f2a9214..c0b20bb 100644 --- a/icing/util/tokenized-document_test.cc +++ b/icing/util/tokenized-document_test.cc @@ -46,7 +46,7 @@ namespace { using ::testing::ElementsAre; using ::testing::Eq; -using ::testing::EqualsProto; +using ::icing::lib::portable_equals_proto::EqualsProto; using ::testing::IsEmpty; using ::testing::SizeIs; diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto index 40a0d0c..db5dbed 100644 --- a/proto/icing/proto/initialize.proto +++ b/proto/icing/proto/initialize.proto @@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto"; option java_multiple_files = true; option objc_class_prefix = "ICNG"; -// Next tag: 7 +// Next tag: 8 message IcingSearchEngineOptions { // Directory to persist files for Icing. Required. // If Icing was previously initialized with this directory, it will reload @@ -75,6 +75,13 @@ message IcingSearchEngineOptions { // // Default to 0 for better rollout of the new index optimize. optional float optimize_rebuild_index_threshold = 6 [default = 0.0]; + + // Level of compression, NO_COMPRESSION = 0, BEST_SPEED = 1, + // BEST_COMPRESSION = 9 + // Valid values: [0, 9] + // Optional. + optional int32 compression_level = 7 [default = 3]; + reserved 2; } diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto index edfcf40..04f655d 100644 --- a/proto/icing/proto/logging.proto +++ b/proto/icing/proto/logging.proto @@ -143,7 +143,7 @@ message PutDocumentStatsProto { // Stats of the top-level function IcingSearchEngine::Search() and // IcingSearchEngine::GetNextPage(). -// Next tag: 21 +// Next tag: 23 message QueryStatsProto { // The UTF-8 length of the query string optional int32 query_length = 16; @@ -207,6 +207,12 @@ message QueryStatsProto { // Time used to send protos across the JNI boundary from native to java side. optional int32 native_to_java_jni_latency_ms = 20; + // The native latency due to the join operation. + optional int32 join_latency_ms = 21; + + // Number of documents scored. + optional int32 num_joined_results_returned_current_page = 22; + reserved 9; } diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto index f471426..dc625fc 100644 --- a/proto/icing/proto/schema.proto +++ b/proto/icing/proto/schema.proto @@ -34,7 +34,7 @@ option objc_class_prefix = "ICNG"; // TODO(cassiewang) Define a sample proto file that can be used by tests and for // documentation. // -// Next tag: 6 +// Next tag: 7 message SchemaTypeConfigProto { // REQUIRED: Named type that uniquely identifies the structured, logical // schema being defined. @@ -60,6 +60,12 @@ message SchemaTypeConfigProto { // it will default to value == 0. optional int32 version = 5; + // An experimental field to make the type as a subtype of parent_type, which + // enables parent_type to be interpreted as its subtypes in the context of the + // Search APIs, including schema type filters and projections specified in + // TypePropertyMask. + optional string parent_type = 6; + reserved 2, 3; } diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index c5bddab..0d6bfb4 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=520930446) +set(synced_AOSP_CL_number=524885330) |