aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-07-23 01:03:27 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-07-23 01:03:27 +0000
commit7f91a4d7bd4012a6e82aa2df72a2932441307a57 (patch)
treec0a00b9b4d52ff3dfeb50f5d894bad2d71389b00
parent732aeaf05fdff34813b4853c5af0bc82fcc6be30 (diff)
parentd462d0aedaec65748ad2a8cb5b3f5cb352ab2c20 (diff)
downloadicing-android12-release.tar.gz
Snap for 7574892 from d462d0aedaec65748ad2a8cb5b3f5cb352ab2c20 to sc-releaseandroid-vts-12.0_r9android-vts-12.0_r8android-vts-12.0_r7android-vts-12.0_r6android-vts-12.0_r5android-vts-12.0_r4android-vts-12.0_r3android-vts-12.0_r2android-vts-12.0_r12android-vts-12.0_r11android-vts-12.0_r10android-vts-12.0_r1android-security-12.0.0_r59android-security-12.0.0_r58android-security-12.0.0_r57android-security-12.0.0_r56android-security-12.0.0_r55android-security-12.0.0_r54android-security-12.0.0_r53android-security-12.0.0_r52android-security-12.0.0_r51android-security-12.0.0_r50android-security-12.0.0_r49android-security-12.0.0_r48android-security-12.0.0_r47android-security-12.0.0_r46android-security-12.0.0_r45android-security-12.0.0_r44android-security-12.0.0_r43android-security-12.0.0_r42android-security-12.0.0_r41android-security-12.0.0_r40android-security-12.0.0_r39android-security-12.0.0_r38android-security-12.0.0_r37android-security-12.0.0_r36android-security-12.0.0_r35android-security-12.0.0_r34android-security-11.0.0_r71android-platform-12.0.0_r9android-platform-12.0.0_r8android-platform-12.0.0_r7android-platform-12.0.0_r6android-platform-12.0.0_r5android-platform-12.0.0_r4android-platform-12.0.0_r31android-platform-12.0.0_r30android-platform-12.0.0_r3android-platform-12.0.0_r29android-platform-12.0.0_r28android-platform-12.0.0_r27android-platform-12.0.0_r26android-platform-12.0.0_r25android-platform-12.0.0_r24android-platform-12.0.0_r23android-platform-12.0.0_r22android-platform-12.0.0_r21android-platform-12.0.0_r20android-platform-12.0.0_r2android-platform-12.0.0_r19android-platform-12.0.0_r18android-platform-12.0.0_r17android-platform-12.0.0_r16android-platform-12.0.0_r15android-platform-12.0.0_r14android-platform-12.0.0_r13android-platform-12.0.0_r12android-platform-12.0.0_r11android-platform-12.0.0_r10android-platform-12.0.0_r1android-cts-12.0_r9android-cts-12.0_r8android-cts-12.0_r7android-cts-12.0_r6android-cts-12.0_r5android-cts-12.0_r4android-cts-12.0_r3android-cts-12.0_r2android-cts-12.0_r12android-cts-12.0_r11android-cts-12.0_r10android-cts-12.0_r1android-12.0.0_r9android-12.0.0_r8android-12.0.0_r34android-12.0.0_r33android-12.0.0_r31android-12.0.0_r30android-12.0.0_r3android-12.0.0_r25android-12.0.0_r2android-12.0.0_r11android-12.0.0_r10android-12.0.0_r1android12-tests-releaseandroid12-security-releaseandroid12-s5-releaseandroid12-s4-releaseandroid12-s3-releaseandroid12-s2-releaseandroid12-s1-releaseandroid12-releaseandroid12-platform-release
Change-Id: Ie81ebda724bc6b83ee3b4b3ba522afc2ab8a4dde
-rw-r--r--icing/file/file-backed-vector.h43
-rw-r--r--icing/file/file-backed-vector_test.cc197
-rw-r--r--icing/store/document-store.cc13
3 files changed, 228 insertions, 25 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 2443cb2..0989935 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -56,6 +56,7 @@
#ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
#define ICING_FILE_FILE_BACKED_VECTOR_H_
+#include <inttypes.h>
#include <stdint.h>
#include <sys/mman.h>
@@ -423,13 +424,6 @@ FileBackedVector<T>::InitializeExistingFile(
absl_ports::StrCat("Invalid header kMagic for ", file_path));
}
- // Mmap the content of the vector, excluding the header so its easier to
- // access elements from the mmapped region
- auto mmapped_file =
- std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
- ICING_RETURN_IF_ERROR(
- mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
-
// Check header
if (header->header_checksum != header->CalculateHeaderChecksum()) {
return absl_ports::FailedPreconditionError(
@@ -442,6 +436,20 @@ FileBackedVector<T>::InitializeExistingFile(
header->element_size));
}
+ int64_t min_file_size = header->num_elements * sizeof(T) + sizeof(Header);
+ if (min_file_size > file_size) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Inconsistent file size, expected %" PRId64 ", actual %" PRId64,
+ min_file_size, file_size));
+ }
+
+ // Mmap the content of the vector, excluding the header so its easier to
+ // access elements from the mmapped region
+ auto mmapped_file =
+ std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
+ ICING_RETURN_IF_ERROR(
+ mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
+
// Check vector contents
Crc32 vector_checksum;
std::string_view vector_contents(
@@ -591,9 +599,24 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
least_file_size_needed = math_util::RoundUpTo(
least_file_size_needed,
int64_t{FileBackedVector<T>::kGrowElements * sizeof(T)});
- if (!filesystem_->Grow(file_path_.c_str(), least_file_size_needed)) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Couldn't grow file ", file_path_));
+
+ // We use PWrite here rather than Grow because Grow doesn't actually allocate
+ // an underlying disk block. This can lead to problems with mmap because mmap
+ // has no effective way to signal that it was impossible to allocate the disk
+ // block and ends up crashing instead. PWrite will force the allocation of
+ // these blocks, which will ensure that any failure to grow will surface here.
+ int64_t page_size = getpagesize();
+ auto buf = std::make_unique<uint8_t[]>(page_size);
+ int64_t size_to_write = page_size - (current_file_size % page_size);
+ ScopedFd sfd(filesystem_->OpenForWrite(file_path_.c_str()));
+ while (current_file_size < least_file_size_needed) {
+ if (!filesystem_->PWrite(sfd.get(), current_file_size, buf.get(),
+ size_to_write)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't grow file ", file_path_));
+ }
+ current_file_size += size_to_write;
+ size_to_write = page_size - (current_file_size % page_size);
}
ICING_RETURN_IF_ERROR(mmapped_file_->Remap(
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index bc2fef6..b05ce2d 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -32,6 +32,7 @@
#include "icing/util/logging.h"
using ::testing::Eq;
+using ::testing::IsTrue;
using ::testing::Pointee;
namespace icing {
@@ -278,7 +279,6 @@ TEST_F(FileBackedVectorTest, Grow) {
filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
-
EXPECT_THAT(vector->Set(kMaxNumElts + 11, 'a'),
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
EXPECT_THAT(vector->Set(-1, 'a'),
@@ -318,25 +318,32 @@ TEST_F(FileBackedVectorTest, GrowsInChunks) {
filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- // Our initial file size should just be the size of the header
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(sizeof(FileBackedVector<char>::Header)));
+ // Our initial file size should just be the size of the header. Disk usage
+ // will indicate that one block has been allocated, which contains the header.
+ int header_size = sizeof(FileBackedVector<char>::Header);
+ int page_size = getpagesize();
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(header_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(page_size));
- // Once we add something though, we'll grow to kGrowElements big
+ // Once we add something though, we'll grow to be kGrowElements big. From this
+ // point on, file size and disk usage should be the same because Growing will
+ // explicitly allocate the number of blocks needed to accomodate the file.
Insert(vector.get(), 0, "a");
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * sizeof(int)));
+ int file_size = kGrowElements * sizeof(int);
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Should still be the same size, don't need to grow underlying file
Insert(vector.get(), 1, "b");
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * sizeof(int)));
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Now we grow by a kGrowElements chunk, so the underlying file is 2
// kGrowElements big
+ file_size *= 2;
Insert(vector.get(), 2, std::string(kGrowElements, 'c'));
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * 2 * sizeof(int)));
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Destroy/persist the contents.
vector.reset();
@@ -463,6 +470,174 @@ TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
}
}
+TEST_F(FileBackedVectorTest, InitFileTooSmallForHeaderFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Shrink the file to be smaller than the header.
+ filesystem_.Truncate(fd_, sizeof(FileBackedVector<char>::Header) - 1);
+
+ {
+ // 3. Attempt to create the file and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitWrongDataSizeFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ {
+ // 2. Attempt to create the file with a different element size and confirm
+ // that it fails.
+ EXPECT_THAT(FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptHeaderFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Modify the header, but don't update the checksum. This would be similar
+ // to corruption of the header.
+ FileBackedVector<char>::Header header;
+ ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+ IsTrue());
+ header.num_elements = 1;
+ ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with a header that doesn't match its
+ // checksum and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitHeaderElementSizeTooBigFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Modify the header so that the number of elements exceeds the actual size
+ // of the underlying file.
+ FileBackedVector<char>::Header header;
+ ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+ IsTrue());
+ int64_t file_size = filesystem_.GetFileSize(fd_);
+ int64_t allocated_elements_size = file_size - sizeof(header);
+ header.num_elements = (allocated_elements_size / sizeof(char)) + 1;
+ header.header_checksum = header.CalculateHeaderChecksum();
+ ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with num_elements that is larger than the
+ // underlying file and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptElementsFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Overwrite the values of the first two elements.
+ std::string corrupted_content = "BY";
+ ASSERT_THAT(
+ filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
+ corrupted_content.c_str(), corrupted_content.length()),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with elements that don't match their
+ // checksum and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ {
+ // 2. Attempt to create the file with a completely valid header and elements
+ // region. This should succeed.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ IsOk());
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 81edce1..226a96b 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -1577,6 +1577,7 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
int size = document_id_mapper_->num_elements();
int num_deleted = 0;
int num_expired = 0;
+ UsageStore::UsageScores default_usage;
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
@@ -1625,10 +1626,14 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
// Copy over usage scores.
ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
usage_store_->GetUsageScores(document_id));
-
- DocumentId new_document_id = new_document_id_or.ValueOrDie();
- ICING_RETURN_IF_ERROR(
- new_doc_store->SetUsageScores(new_document_id, usage_scores));
+ if (!(usage_scores == default_usage)) {
+ // If the usage scores for this document are the default (no usage), then
+ // don't bother setting it. No need to possibly allocate storage if
+ // there's nothing interesting to store.
+ DocumentId new_document_id = new_document_id_or.ValueOrDie();
+ ICING_RETURN_IF_ERROR(
+ new_doc_store->SetUsageScores(new_document_id, usage_scores));
+ }
}
if (stats != nullptr) {
stats->set_num_original_documents(size);