aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-11-15 15:27:22 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-11-15 15:27:22 +0000
commit7d0fe0ad2ae225183154fda971651924cc22aba0 (patch)
treec0a00b9b4d52ff3dfeb50f5d894bad2d71389b00
parent59f3609352991a202a2a0186c1ca78771c9eaa54 (diff)
parent98f9e8aacdf9898e4ff093385365a233d25bf24f (diff)
downloadicing-android12-mainline-sdkext-release.tar.gz
Snap for 7915578 from 98f9e8aacdf9898e4ff093385365a233d25bf24f to mainline-sdkext-releaseandroid-mainline-12.0.0_r81android-mainline-12.0.0_r109aml_sdk_311710000android12-mainline-sdkext-release
Change-Id: Ia937562b72052ca20f9499996a927a5a624fcabd
-rw-r--r--icing/file/file-backed-vector.h43
-rw-r--r--icing/file/file-backed-vector_test.cc197
-rw-r--r--icing/store/document-store.cc13
3 files changed, 228 insertions, 25 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 2443cb2..0989935 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -56,6 +56,7 @@
#ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
#define ICING_FILE_FILE_BACKED_VECTOR_H_
+#include <inttypes.h>
#include <stdint.h>
#include <sys/mman.h>
@@ -423,13 +424,6 @@ FileBackedVector<T>::InitializeExistingFile(
absl_ports::StrCat("Invalid header kMagic for ", file_path));
}
- // Mmap the content of the vector, excluding the header so its easier to
- // access elements from the mmapped region
- auto mmapped_file =
- std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
- ICING_RETURN_IF_ERROR(
- mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
-
// Check header
if (header->header_checksum != header->CalculateHeaderChecksum()) {
return absl_ports::FailedPreconditionError(
@@ -442,6 +436,20 @@ FileBackedVector<T>::InitializeExistingFile(
header->element_size));
}
+ int64_t min_file_size = header->num_elements * sizeof(T) + sizeof(Header);
+ if (min_file_size > file_size) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Inconsistent file size, expected %" PRId64 ", actual %" PRId64,
+ min_file_size, file_size));
+ }
+
+ // Mmap the content of the vector, excluding the header so its easier to
+ // access elements from the mmapped region
+ auto mmapped_file =
+ std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
+ ICING_RETURN_IF_ERROR(
+ mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
+
// Check vector contents
Crc32 vector_checksum;
std::string_view vector_contents(
@@ -591,9 +599,24 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
least_file_size_needed = math_util::RoundUpTo(
least_file_size_needed,
int64_t{FileBackedVector<T>::kGrowElements * sizeof(T)});
- if (!filesystem_->Grow(file_path_.c_str(), least_file_size_needed)) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Couldn't grow file ", file_path_));
+
+ // We use PWrite here rather than Grow because Grow doesn't actually allocate
+ // an underlying disk block. This can lead to problems with mmap because mmap
+ // has no effective way to signal that it was impossible to allocate the disk
+ // block and ends up crashing instead. PWrite will force the allocation of
+ // these blocks, which will ensure that any failure to grow will surface here.
+ int64_t page_size = getpagesize();
+ auto buf = std::make_unique<uint8_t[]>(page_size);
+ int64_t size_to_write = page_size - (current_file_size % page_size);
+ ScopedFd sfd(filesystem_->OpenForWrite(file_path_.c_str()));
+ while (current_file_size < least_file_size_needed) {
+ if (!filesystem_->PWrite(sfd.get(), current_file_size, buf.get(),
+ size_to_write)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't grow file ", file_path_));
+ }
+ current_file_size += size_to_write;
+ size_to_write = page_size - (current_file_size % page_size);
}
ICING_RETURN_IF_ERROR(mmapped_file_->Remap(
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index bc2fef6..b05ce2d 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -32,6 +32,7 @@
#include "icing/util/logging.h"
using ::testing::Eq;
+using ::testing::IsTrue;
using ::testing::Pointee;
namespace icing {
@@ -278,7 +279,6 @@ TEST_F(FileBackedVectorTest, Grow) {
filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
-
EXPECT_THAT(vector->Set(kMaxNumElts + 11, 'a'),
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
EXPECT_THAT(vector->Set(-1, 'a'),
@@ -318,25 +318,32 @@ TEST_F(FileBackedVectorTest, GrowsInChunks) {
filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- // Our initial file size should just be the size of the header
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(sizeof(FileBackedVector<char>::Header)));
+ // Our initial file size should just be the size of the header. Disk usage
+ // will indicate that one block has been allocated, which contains the header.
+ int header_size = sizeof(FileBackedVector<char>::Header);
+ int page_size = getpagesize();
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(header_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(page_size));
- // Once we add something though, we'll grow to kGrowElements big
+ // Once we add something though, we'll grow to be kGrowElements big. From this
+ // point on, file size and disk usage should be the same because Growing will
+ // explicitly allocate the number of blocks needed to accomodate the file.
Insert(vector.get(), 0, "a");
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * sizeof(int)));
+ int file_size = kGrowElements * sizeof(int);
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Should still be the same size, don't need to grow underlying file
Insert(vector.get(), 1, "b");
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * sizeof(int)));
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Now we grow by a kGrowElements chunk, so the underlying file is 2
// kGrowElements big
+ file_size *= 2;
Insert(vector.get(), 2, std::string(kGrowElements, 'c'));
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * 2 * sizeof(int)));
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Destroy/persist the contents.
vector.reset();
@@ -463,6 +470,174 @@ TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
}
}
+TEST_F(FileBackedVectorTest, InitFileTooSmallForHeaderFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Shrink the file to be smaller than the header.
+ filesystem_.Truncate(fd_, sizeof(FileBackedVector<char>::Header) - 1);
+
+ {
+ // 3. Attempt to create the file and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitWrongDataSizeFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ {
+ // 2. Attempt to create the file with a different element size and confirm
+ // that it fails.
+ EXPECT_THAT(FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptHeaderFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Modify the header, but don't update the checksum. This would be similar
+ // to corruption of the header.
+ FileBackedVector<char>::Header header;
+ ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+ IsTrue());
+ header.num_elements = 1;
+ ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with a header that doesn't match its
+ // checksum and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitHeaderElementSizeTooBigFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Modify the header so that the number of elements exceeds the actual size
+ // of the underlying file.
+ FileBackedVector<char>::Header header;
+ ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+ IsTrue());
+ int64_t file_size = filesystem_.GetFileSize(fd_);
+ int64_t allocated_elements_size = file_size - sizeof(header);
+ header.num_elements = (allocated_elements_size / sizeof(char)) + 1;
+ header.header_checksum = header.CalculateHeaderChecksum();
+ ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with num_elements that is larger than the
+ // underlying file and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptElementsFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Overwrite the values of the first two elements.
+ std::string corrupted_content = "BY";
+ ASSERT_THAT(
+ filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
+ corrupted_content.c_str(), corrupted_content.length()),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with elements that don't match their
+ // checksum and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ {
+ // 2. Attempt to create the file with a completely valid header and elements
+ // region. This should succeed.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ IsOk());
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 81edce1..226a96b 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -1577,6 +1577,7 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
int size = document_id_mapper_->num_elements();
int num_deleted = 0;
int num_expired = 0;
+ UsageStore::UsageScores default_usage;
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
@@ -1625,10 +1626,14 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
// Copy over usage scores.
ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
usage_store_->GetUsageScores(document_id));
-
- DocumentId new_document_id = new_document_id_or.ValueOrDie();
- ICING_RETURN_IF_ERROR(
- new_doc_store->SetUsageScores(new_document_id, usage_scores));
+ if (!(usage_scores == default_usage)) {
+ // If the usage scores for this document are the default (no usage), then
+ // don't bother setting it. No need to possibly allocate storage if
+ // there's nothing interesting to store.
+ DocumentId new_document_id = new_document_id_or.ValueOrDie();
+ ICING_RETURN_IF_ERROR(
+ new_doc_store->SetUsageScores(new_document_id, usage_scores));
+ }
}
if (stats != nullptr) {
stats->set_num_original_documents(size);