Snap for 8730993 from 98f9e8aacdf9898e4ff093385365a233d25bf24f to mainline-tzdata3-releaseaml_tz3_314012070 aml_tz3_314012050 aml_tz3_314012010 aml_tz3_313110000 aml_tz3_312511020 aml_tz3_312511010 aml_tz3_312410020 aml_tz3_312410010 android12-mainline-tzdata3-release aml_tz3_314012010

Change-Id: I3aff676785fcc7c7da09269c5fb50e4461fbdea1
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2022-06-15 21:39:32 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2022-06-15 21:39:32 +0000
commit: 854cabe58fe83993ab608b428c6a97c5565dcb0c (patch)
tree: c0a00b9b4d52ff3dfeb50f5d894bad2d71389b00
parent: 9c6c6103b62bb8941c2bd711f0e6cb47b6f10b2e (diff)
parent: 98f9e8aacdf9898e4ff093385365a233d25bf24f (diff)
download: icing-aml_tz3_314012010.tar.gz
169 files changed, 3670 insertions, 10188 deletions
diff --git a/Android.bp b/Android.bp
index 909e3ed..dda6614 100644
--- a/Android.bp
+++ b/Android.bp
@@ -82,13 +82,14 @@ cc_library_shared {
         "libutf",
     ],
     shared_libs: [
-        "libicu",
+        "libandroidicu",
         "liblog",
-        "libprotobuf-cpp-lite",
+        // TODO(b/147509515): We only need the full version for GzipStream. If we can remove
+        // that dependency, then we can just use libprotobuf-cpp-lite
+        "libprotobuf-cpp-full",
         "libz",
     ],
     version_script: "icing/jni.lds",
-    min_sdk_version: "Tiramisu",
 }
 
 // TODO(cassiewang): Add build rules and a TEST_MAPPING for cc_tests
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c8e439..01ee8eb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,8 +14,6 @@
 
 cmake_minimum_required(VERSION 3.10.2)
 
-project(icing)
-
 add_definitions("-DICING_REVERSE_JNI_SEGMENTATION=1")
 set(VERSION_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/icing/jni.lds")
 set(CMAKE_SHARED_LINKER_FLAGS
@@ -76,7 +74,7 @@ foreach(FILE ${Icing_PROTO_FILES})
           "${Icing_PROTO_GEN_DIR}/${FILE_NOEXT}.pb.h"
         COMMAND ${Protobuf_PROTOC_PATH}
           --proto_path "${CMAKE_CURRENT_SOURCE_DIR}/proto"
-          --cpp_out "lite:${Icing_PROTO_GEN_DIR}"
+          --cpp_out ${Icing_PROTO_GEN_DIR}
           ${FILE}
         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
         DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/proto/${FILE}
@@ -129,4 +127,4 @@ target_include_directories(icing PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
 target_include_directories(icing PRIVATE ${Icing_PROTO_GEN_DIR})
 target_include_directories(icing PRIVATE "${Protobuf_SOURCE_DIR}/src")
 target_include_directories(icing PRIVATE "${ICU_SOURCE_DIR}/include")
-target_link_libraries(icing protobuf::libprotobuf-lite libandroidicu log z)
+target_link_libraries(icing protobuf::libprotobuf libandroidicu log)
diff --git a/OWNERS b/OWNERS
deleted file mode 100644
index 6ec1a95..0000000
--- a/OWNERS
+++ /dev/null
@@ -1,3 +0,0 @@
-adorokhine@google.com
-tjbarron@google.com
-dsaadati@google.com
diff --git a/TEST_MAPPING b/TEST_MAPPING
index baef43b..37cb5fc 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -4,14 +4,9 @@
       "name": "IcingSearchEngineTest"
     }
   ],
-  "hwasan-postsubmit": [
-    {
-      "name": "IcingSearchEngineTest"
-    }
-  ],
   "imports": [
     {
-      "path": "packages/modules/AppSearch"
+      "path": "frameworks/base/apex/appsearch/service/java/com/android/server/appsearch"
     }
   ]
 }
diff --git a/build.gradle b/build.gradle
index 5b5f3a6..882a929 100644
--- a/build.gradle
+++ b/build.gradle
@@ -15,6 +15,7 @@
  */
 
 import static androidx.build.SupportConfig.*
+import static androidx.build.dependencies.DependenciesKt.*
 
 buildscript {
     dependencies {
@@ -56,14 +57,14 @@ dependencies {
 
     implementation('com.google.protobuf:protobuf-javalite:3.10.0')
 
-    androidTestImplementation(libs.testCore)
-    androidTestImplementation(libs.testRules)
-    androidTestImplementation(libs.truth)
+    androidTestImplementation(ANDROIDX_TEST_CORE)
+    androidTestImplementation(ANDROIDX_TEST_RULES)
+    androidTestImplementation(TRUTH)
 }
 
 protobuf {
     protoc {
-        artifact = libs.protobufCompiler.get()
+        artifact = 'com.google.protobuf:protoc:3.10.0'
     }
 
     generateProtoTasks {
@@ -92,7 +93,7 @@ android.libraryVariants.all { variant ->
         // only renames the java classes. Remove them here since they are unused.
         // Expand the jar and remove any .proto files.
         from(zipTree(configurations.detachedConfiguration(
-                dependencies.create(libs.protobufLite.get())).getSingleFile())) {
+                dependencies.create(PROTOBUF_LITE)).getSingleFile())) {
             exclude("**/*.proto")
         }
 
diff --git a/icing/file/file-backed-bitmap.cc b/icing/file/file-backed-bitmap.cc
index eec7668..f1e568c 100644
--- a/icing/file/file-backed-bitmap.cc
+++ b/icing/file/file-backed-bitmap.cc
@@ -50,7 +50,7 @@ FileBackedBitmap::Create(const Filesystem* filesystem,
   auto bitmap = std::unique_ptr<FileBackedBitmap>(
       new FileBackedBitmap(filesystem, file_path, mmap_strategy));
 
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = bitmap->Initialize();
   if (!status.ok()) {
@@ -122,7 +122,7 @@ libtextclassifier3::Status FileBackedBitmap::FileBackedBitmap::Initialize() {
                   << " of size: " << file_size;
   }
 
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, file_size);
   if (!status.ok()) {
@@ -198,7 +198,7 @@ int FileBackedBitmap::NumBits() const {
 libtextclassifier3::Status FileBackedBitmap::Set(int bit_index,
                                                  bool bit_value) {
   if (bit_index >= NumBits()) {
-    // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+    // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
     // that can support error logging.
     libtextclassifier3::Status status = GrowTo(bit_index);
     if (!status.ok()) {
@@ -261,7 +261,7 @@ libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
                                       file_path_.c_str(), new_file_size));
   }
 
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
   if (!status.ok()) {
@@ -281,7 +281,7 @@ libtextclassifier3::Status FileBackedBitmap::TruncateTo(int new_num_bits) {
   }
 
   const size_t new_file_size = FileSizeForBits(new_num_bits);
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
   if (!status.ok()) {
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 686b4fb..b2b37e8 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -14,14 +14,16 @@
 
 // File-backed log of protos with append-only writes and position based reads.
 //
-// The implementation in this file is deprecated and replaced by
-// portable-file-backed-proto-log.h.
+// There should only be one instance of a FileBackedProtoLog of the same file at
+// a time; using multiple instances at the same time may lead to undefined
+// behavior.
 //
-// This deprecated implementation has been made read-only for the purposes of
-// migration; writing and erasing this format of log is no longer supported and
-// the methods to accomplish this have been removed.
+// The entire checksum is computed on initialization to verify the contents are
+// valid. On failure, the log will be truncated to the last verified state when
+// PersistToDisk() was called. If the log cannot successfully restore the last
+// state due to disk corruption or some other inconsistency, then the entire log
+// will be lost.
 //
-// The details of this format follow below:
 // Each proto written to the file will have a metadata written just before it.
 // The metadata consists of
 //   {
@@ -29,24 +31,45 @@
 //     3 bytes of the proto size
 //     n bytes of the proto itself
 //   }
+//
+// Example usage:
+//   ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+//       FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path_,
+//                                                  options));
+//   auto proto_log = create_result.proto_log;
+//
+//   Document document;
+//   document.set_namespace("com.google.android.example");
+//   document.set_uri("www.google.com");
+//
+//   int64_t document_offset = proto_log->WriteProto(document));
+//   Document same_document = proto_log->ReadProto(document_offset));
+//   proto_log->PersistToDisk();
+//
 // TODO(b/136514769): Add versioning to the header and a UpgradeToVersion
 // migration method.
+
 #ifndef ICING_FILE_FILE_BACKED_PROTO_LOG_H_
 #define ICING_FILE_FILE_BACKED_PROTO_LOG_H_
 
+#include <cstddef>
 #include <cstdint>
+#include <cstring>
 #include <memory>
 #include <string>
 #include <string_view>
+#include <utility>
+#include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include <google/protobuf/io/gzip_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
 #include "icing/legacy/core/icing-string-util.h"
-#include "icing/portable/gzip_stream.h"
 #include "icing/portable/platform.h"
 #include "icing/portable/zlib.h"
 #include "icing/util/crc32.h"
@@ -89,6 +112,10 @@ class FileBackedProtoLog {
 
   // Header stored at the beginning of the file before the rest of the log
   // contents. Stores metadata on the log.
+  //
+  // TODO(b/139375388): Migrate the Header struct to a proto. This makes
+  // migrations easier since we don't need to worry about different size padding
+  // (which would affect the checksum) and different endians.
   struct Header {
     static constexpr int32_t kMagic = 0xf4c6f67a;
 
@@ -168,6 +195,20 @@ class FileBackedProtoLog {
   FileBackedProtoLog(const FileBackedProtoLog&) = delete;
   FileBackedProtoLog& operator=(const FileBackedProtoLog&) = delete;
 
+  // This will update the checksum of the log as well.
+  ~FileBackedProtoLog();
+
+  // Writes the serialized proto to the underlying file. Writes are applied
+  // directly to the underlying file. Users do not need to sync the file after
+  // writing.
+  //
+  // Returns:
+  //   Offset of the newly appended proto in file on success
+  //   INVALID_ARGUMENT if proto is too large, as decided by
+  //     Options.max_proto_size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> WriteProto(const ProtoT& proto);
+
   // Reads out a proto located at file_offset from the file.
   //
   // Returns:
@@ -177,6 +218,31 @@ class FileBackedProtoLog {
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const;
 
+  // Erases the data of a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   OK on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status EraseProto(int64_t file_offset);
+
+  // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+  // block size.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // Returns the file size of all the elements held in the log. File size is in
+  // bytes. This excludes the size of any internal metadata of the log, e.g. the
+  // log's header.
+  //
+  // Returns:
+  //   File size on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+
   // An iterator helping to find offsets of all the protos in file.
   // Example usage:
   //
@@ -215,6 +281,72 @@ class FileBackedProtoLog {
   // behaviors could happen.
   Iterator GetIterator();
 
+  // Persists all changes since initialization or the last call to
+  // PersistToDisk(). Any changes that aren't persisted may be lost if the
+  // system fails to close safely.
+  //
+  // Example use case:
+  //
+  //   Document document;
+  //   document.set_namespace("com.google.android.example");
+  //   document.set_uri("www.google.com");
+  //
+  //   {
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     int64_t document_offset = proto_log->WriteProto(document));
+  //
+  //     // We lose the document here since it wasn't persisted.
+  //     // *SYSTEM CRASH*
+  //   }
+  //
+  //   {
+  //     // Can still successfully create after a crash since the log can
+  //     // rewind/truncate to recover into a previously good state
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     // Lost the proto since we didn't PersistToDisk before the crash
+  //     proto_log->ReadProto(document_offset)); // INVALID_ARGUMENT error
+  //
+  //     int64_t document_offset = proto_log->WriteProto(document));
+  //
+  //     // Persisted this time, so we should be ok.
+  //     ICING_ASSERT_OK(proto_log->PersistToDisk());
+  //   }
+  //
+  //   {
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     // SUCCESS
+  //     Document same_document = proto_log->ReadProto(document_offset));
+  //   }
+  //
+  // NOTE: Since all protos are already written to the file directly, this
+  // just updates the checksum and rewind position. Without these updates,
+  // future initializations will truncate the file and discard unpersisted
+  // changes.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status PersistToDisk();
+
+  // Calculates the checksum of the log contents. Excludes the header content.
+  //
+  // Returns:
+  //   Crc of the log content
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
  private:
   // Object can only be instantiated via the ::Create factory.
   FileBackedProtoLog(const Filesystem* filesystem, const std::string& file_path,
@@ -292,6 +424,9 @@ class FileBackedProtoLog {
   static_assert(kMaxProtoSize <= 0x00FFFFFF,
                 "kMaxProtoSize doesn't fit in 3 bytes");
 
+  // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
+  static constexpr int kDeflateCompressionLevel = 3;
+
   // Chunks of the file to mmap at a time, so we don't mmap the entire file.
   // Only used on 32-bit devices
   static constexpr int kMmapChunkSize = 4 * 1024 * 1024;  // 4MiB
@@ -303,6 +438,9 @@ class FileBackedProtoLog {
 };
 
 template <typename ProtoT>
+constexpr uint8_t FileBackedProtoLog<ProtoT>::kProtoMagic;
+
+template <typename ProtoT>
 FileBackedProtoLog<ProtoT>::FileBackedProtoLog(const Filesystem* filesystem,
                                                const std::string& file_path,
                                                std::unique_ptr<Header> header)
@@ -313,6 +451,15 @@ FileBackedProtoLog<ProtoT>::FileBackedProtoLog(const Filesystem* filesystem,
 }
 
 template <typename ProtoT>
+FileBackedProtoLog<ProtoT>::~FileBackedProtoLog() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING)
+        << "Error persisting to disk during destruction of FileBackedProtoLog: "
+        << file_path_;
+  }
+}
+
+template <typename ProtoT>
 libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::CreateResult>
 FileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
                                    const std::string& file_path,
@@ -541,6 +688,79 @@ libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
 }
 
 template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::WriteProto(
+    const ProtoT& proto) {
+  int64_t proto_size = proto.ByteSizeLong();
+  int32_t metadata;
+  int metadata_size = sizeof(metadata);
+  int64_t current_position = filesystem_->GetCurrentPosition(fd_.get());
+
+  if (proto_size > header_->max_proto_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "proto_size, %lld, was too large to write. Max is %d",
+        static_cast<long long>(proto_size), header_->max_proto_size));
+  }
+
+  // At this point, we've guaranteed that proto_size is under kMaxProtoSize
+  // (see
+  // ::Create), so we can safely store it in an int.
+  int final_size = 0;
+
+  std::string proto_str;
+  google::protobuf::io::StringOutputStream proto_stream(&proto_str);
+
+  if (header_->compress) {
+    google::protobuf::io::GzipOutputStream::Options options;
+    options.format = google::protobuf::io::GzipOutputStream::ZLIB;
+    options.compression_level = kDeflateCompressionLevel;
+
+    google::protobuf::io::GzipOutputStream compressing_stream(&proto_stream,
+                                                                  options);
+
+    bool success = proto.SerializeToZeroCopyStream(&compressing_stream) &&
+                   compressing_stream.Close();
+
+    if (!success) {
+      return absl_ports::InternalError("Error compressing proto.");
+    }
+
+    final_size = proto_str.size();
+
+    // In case the compressed proto is larger than the original proto, we also
+    // can't write it.
+    if (final_size > header_->max_proto_size) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Compressed proto size, %d, was greater than "
+          "max_proto_size, %d",
+          final_size, header_->max_proto_size));
+    }
+  } else {
+    // Serialize the proto directly into the write buffer at an offset of the
+    // metadata.
+    proto.SerializeToZeroCopyStream(&proto_stream);
+    final_size = proto_str.size();
+  }
+
+  // 1st byte for magic, next 3 bytes for proto size.
+  metadata = (kProtoMagic << 24) | final_size;
+
+  // Actually write metadata, has to be done after we know the possibly
+  // compressed proto size
+  if (!filesystem_->Write(fd_.get(), &metadata, metadata_size)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto metadata to: ", file_path_));
+  }
+
+  // Write the serialized proto
+  if (!filesystem_->Write(fd_.get(), proto_str.data(), proto_str.size())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto to: ", file_path_));
+  }
+
+  return current_position;
+}
+
+template <typename ProtoT>
 libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
     int64_t file_offset) const {
   int64_t file_size = filesystem_->GetFileSize(fd_.get());
@@ -576,7 +796,7 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
   // Deserialize proto
   ProtoT proto;
   if (header_->compress) {
-    protobuf_ports::GzipInputStream decompress_stream(&proto_stream);
+    google::protobuf::io::GzipInputStream decompress_stream(&proto_stream);
     proto.ParseFromZeroCopyStream(&decompress_stream);
   } else {
     proto.ParseFromZeroCopyStream(&proto_stream);
@@ -586,6 +806,83 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
 }
 
 template <typename ProtoT>
+libtextclassifier3::Status FileBackedProtoLog<ProtoT>::EraseProto(
+    int64_t file_offset) {
+  int64_t file_size = filesystem_->GetFileSize(fd_.get());
+  if (file_offset >= file_size) {
+    // file_size points to the next byte to write at, so subtract one to get
+    // the inclusive, actual size of file.
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Trying to erase data at a location, %lld, "
+        "out of range of the file size, %lld",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size - 1)));
+  }
+
+  MemoryMappedFile mmapped_file(
+      *filesystem_, file_path_,
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
+
+  // Read out the metadata
+  ICING_ASSIGN_OR_RETURN(
+      int metadata, ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+
+  ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
+                                           GetProtoSize(metadata)));
+
+  // We need to update the crc checksum if the erased area is before the
+  // rewind position.
+  if (file_offset + sizeof(metadata) < header_->rewind_offset) {
+    // We need to calculate [original string xor 0s].
+    // The xored string is the same as the original string because 0 xor 0 =
+    // 0, 1 xor 0 = 1.
+    const std::string_view xored_str(mmapped_file.region(),
+                                     mmapped_file.region_size());
+
+    Crc32 crc(header_->log_checksum);
+    ICING_ASSIGN_OR_RETURN(
+        uint32_t new_crc,
+        crc.UpdateWithXor(
+            xored_str,
+            /*full_data_size=*/header_->rewind_offset - sizeof(Header),
+            /*position=*/file_offset + sizeof(metadata) - sizeof(Header)));
+
+    header_->log_checksum = new_crc;
+    header_->header_checksum = header_->CalculateHeaderChecksum();
+
+    if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                             sizeof(Header))) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Failed to update header to: ", file_path_));
+    }
+  }
+
+  memset(mmapped_file.mutable_region(), '\0', mmapped_file.region_size());
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::GetDiskUsage()
+    const {
+  int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
+  if (size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError("Failed to get disk usage of proto log");
+  }
+  return size;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+FileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
+  int64_t total_file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (total_file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        "Failed to get file size of elments in the proto log");
+  }
+  return total_file_size - sizeof(Header);
+}
+
+template <typename ProtoT>
 FileBackedProtoLog<ProtoT>::Iterator::Iterator(const Filesystem& filesystem,
                                                const std::string& file_path,
                                                int64_t initial_offset)
@@ -667,6 +964,51 @@ libtextclassifier3::StatusOr<int> FileBackedProtoLog<ProtoT>::ReadProtoMetadata(
   return metadata;
 }
 
+template <typename ProtoT>
+libtextclassifier3::Status FileBackedProtoLog<ProtoT>::PersistToDisk() {
+  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (file_size == header_->rewind_offset) {
+    // No new protos appended, don't need to update the checksum.
+    return libtextclassifier3::Status::OK;
+  }
+
+  int64_t new_content_size = file_size - header_->rewind_offset;
+  Crc32 crc;
+  if (new_content_size < 0) {
+    // File shrunk, recalculate the entire checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc, ComputeChecksum(filesystem_, file_path_, Crc32(), sizeof(Header),
+                             file_size));
+  } else {
+    // Append new changes to the existing checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc,
+        ComputeChecksum(filesystem_, file_path_, Crc32(header_->log_checksum),
+                        header_->rewind_offset, file_size));
+  }
+
+  header_->log_checksum = crc.Get();
+  header_->rewind_offset = file_size;
+  header_->header_checksum = header_->CalculateHeaderChecksum();
+
+  if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                           sizeof(Header)) ||
+      !filesystem_->DataSync(fd_.get())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to update header to: ", file_path_));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32>
+FileBackedProtoLog<ProtoT>::ComputeChecksum() {
+  return FileBackedProtoLog<ProtoT>::ComputeChecksum(
+      filesystem_, file_path_, Crc32(), /*start=*/sizeof(Header),
+      /*end=*/filesystem_->GetFileSize(file_path_.c_str()));
+}
+
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
new file mode 100644
index 0000000..c09fd5a
--- /dev/null
+++ b/icing/file/file-backed-proto-log_benchmark.cc
@@ -0,0 +1,251 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <random>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/tmp-directory.h"
+
+// go/microbenchmarks
+//
+// To build and run on a local machine:
+//   $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//   icing/file:file-backed-proto-log_benchmark
+//
+//   $ blaze-bin/icing/file/file-backed-proto-log_benchmark
+//   --benchmarks=all
+//
+//
+// To build and run on an Android device (must be connected and rooted):
+//   $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//   --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//   icing/file:file-backed-proto-log_benchmark
+//
+//   $ adb root
+//
+//   $ adb push
+//   blaze-bin/icing/file/file-backed-proto-log_benchmark
+//   /data/local/tmp/
+//
+//   $ adb shell /data/local/tmp/file-backed-proto-log-benchmark
+//   --benchmarks=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+static void BM_Write(benchmark::State& state) {
+  const Filesystem filesystem;
+  int string_length = state.range(0);
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->WriteProto(document));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          string_length);
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Write)
+    ->Arg(1)
+    ->Arg(32)
+    ->Arg(512)
+    ->Arg(1024)
+    ->Arg(4 * 1024)
+    ->Arg(8 * 1024)
+    ->Arg(16 * 1024)
+    ->Arg(32 * 1024)
+    ->Arg(256 * 1024)
+    ->Arg(2 * 1024 * 1024)
+    ->Arg(8 * 1024 * 1024)
+    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
+                              // 16MiB, and we need some extra space for the
+                              // rest of the document properties
+
+static void BM_Read(benchmark::State& state) {
+  const Filesystem filesystem;
+  int string_length = state.range(0);
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                             proto_log->WriteProto(document));
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ReadProto(write_offset));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          string_length);
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Read)
+    ->Arg(1)
+    ->Arg(32)
+    ->Arg(512)
+    ->Arg(1024)
+    ->Arg(4 * 1024)
+    ->Arg(8 * 1024)
+    ->Arg(16 * 1024)
+    ->Arg(32 * 1024)
+    ->Arg(256 * 1024)
+    ->Arg(2 * 1024 * 1024)
+    ->Arg(8 * 1024 * 1024)
+    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
+                              // 16MiB, and we need some extra space for the
+                              // rest of the document properties
+
+static void BM_Erase(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s", GetTestTempDir().c_str(), "/proto.log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                               proto_log->WriteProto(document));
+    state.ResumeTiming();
+
+    testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
+
+static void BM_ComputeChecksum(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = GetTestTempDir() + "/proto.log";
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Make each document 1KiB
+  int string_length = 1024;
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  int num_docs = state.range(0);
+  for (int i = 0; i < num_docs; ++i) {
+    ICING_ASSERT_OK(proto_log->WriteProto(document));
+  }
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ComputeChecksum());
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/file-backed-proto-log_test.cc b/icing/file/file-backed-proto-log_test.cc
index eccb0c7..d429277 100644
--- a/icing/file/file-backed-proto-log_test.cc
+++ b/icing/file/file-backed-proto-log_test.cc
@@ -19,7 +19,10 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
 #include "icing/proto/document.pb.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
@@ -29,7 +32,14 @@ namespace lib {
 
 namespace {
 
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::A;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::Not;
 using ::testing::NotNull;
+using ::testing::Pair;
+using ::testing::Return;
 
 class FileBackedProtoLogTest : public ::testing::Test {
  protected:
@@ -77,6 +87,193 @@ TEST_F(FileBackedProtoLogTest, Initialize) {
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
+TEST_F(FileBackedProtoLogTest, WriteProtoTooLarge) {
+  int max_proto_size = 1;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Proto is too large for the max_proto_size_in
+  ASSERT_THAT(proto_log->WriteProto(document),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(FileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Write a proto
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t file_offset,
+                             proto_log->WriteProto(document));
+
+  // The 4 bytes of metadata that just doesn't have the same kProtoMagic
+  // specified in file-backed-proto-log.h
+  uint32_t wrong_magic = 0x7E000000;
+
+  // Sanity check that we opened the file correctly
+  int fd = filesystem_.OpenForWrite(file_path_.c_str());
+  ASSERT_GT(fd, 0);
+
+  // Write the wrong kProtoMagic in, kProtoMagics are stored at the beginning of
+  // a proto entry.
+  filesystem_.PWrite(fd, file_offset, &wrong_magic, sizeof(wrong_magic));
+
+  ASSERT_THAT(proto_log->ReadProto(file_offset),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) {
+  int last_offset;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/false, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the first proto
+    DocumentProto document1 =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+                               proto_log->WriteProto(document1));
+
+    int document1_offset = written_position;
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto that's close to the max size. Leave some room for
+    // the rest of the proto properties.
+    std::string long_str(max_proto_size_ - 1024, 'a');
+    DocumentProto document2 = DocumentBuilder()
+                                  .SetKey("namespace2", "uri2")
+                                  .AddStringProperty("long_str", long_str)
+                                  .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(written_position,
+                               proto_log->WriteProto(document2));
+
+    int document2_offset = written_position;
+    last_offset = written_position;
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  {
+    // Make a new proto_log with the same file_path, and make sure we
+    // can still write to the same underlying file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/false, max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write a third proto
+    DocumentProto document3 =
+        DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+                IsOkAndHolds(Gt(last_offset)));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) {
+  int last_offset;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the first proto
+    DocumentProto document1 =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+                               proto_log->WriteProto(document1));
+
+    int document1_offset = written_position;
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto that's close to the max size. Leave some room for
+    // the rest of the proto properties.
+    std::string long_str(max_proto_size_ - 1024, 'a');
+    DocumentProto document2 = DocumentBuilder()
+                                  .SetKey("namespace2", "uri2")
+                                  .AddStringProperty("long_str", long_str)
+                                  .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(written_position,
+                               proto_log->WriteProto(document2));
+
+    int document2_offset = written_position;
+    last_offset = written_position;
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  {
+    // Make a new proto_log with the same file_path, and make sure we
+    // can still write to the same underlying file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write a third proto
+    DocumentProto document3 =
+        DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+                IsOkAndHolds(Gt(last_offset)));
+  }
+}
+
 TEST_F(FileBackedProtoLogTest, CorruptHeader) {
   {
     ICING_ASSERT_OK_AND_ASSIGN(
@@ -106,6 +303,382 @@ TEST_F(FileBackedProtoLogTest, CorruptHeader) {
   }
 }
 
+TEST_F(FileBackedProtoLogTest, CorruptContent) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write and persist an document.
+    ICING_ASSERT_OK_AND_ASSIGN(int document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // "Corrupt" the content written in the log.
+    document.set_uri("invalid");
+    std::string serialized_document = document.SerializeAsString();
+    filesystem_.PWrite(file_path_.c_str(), document_offset,
+                       serialized_document.data(), serialized_document.size());
+  }
+
+  {
+    // We can recover, but we have data loss.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_TRUE(create_result.has_data_loss());
+    ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+
+    // Lost everything in the log since the rewind position doesn't help if
+    // there's been data corruption within the persisted region
+    ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
+              sizeof(FileBackedProtoLog<DocumentProto>::Header));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, PersistToDisk) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace1", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace2", "uri2").Build();
+  int document1_offset, document2_offset;
+  int log_size;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
+                               proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // Write, but don't explicitly persist the second proto
+    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+                               proto_log->WriteProto(document2));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+    ASSERT_THAT(proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    log_size = filesystem_.GetFileSize(file_path_.c_str());
+    ASSERT_GT(log_size, 0);
+  }
+
+  {
+    // The header rewind position and checksum aren't updated in this "system
+    // crash" scenario.
+
+    std::string bad_proto =
+        "some incomplete proto that we didn't finish writing before the system "
+        "crashed";
+    filesystem_.PWrite(file_path_.c_str(), log_size, bad_proto.data(),
+                       bad_proto.size());
+
+    // Double check that we actually wrote something to the underlying file
+    ASSERT_GT(filesystem_.GetFileSize(file_path_.c_str()), log_size);
+  }
+
+  {
+    // We can recover, but we have data loss
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_TRUE(create_result.has_data_loss());
+    ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+
+    // Check that everything was persisted across instances
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+    ASSERT_THAT(proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    // We correctly rewound to the last good state.
+    ASSERT_EQ(log_size, filesystem_.GetFileSize(file_path_.c_str()));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, Iterator) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  {
+    // Empty iterator
+    auto iterator = proto_log->GetIterator();
+    ASSERT_THAT(iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+
+  {
+    // Iterates through some documents
+    ICING_ASSERT_OK(proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->WriteProto(document2));
+    auto iterator = proto_log->GetIterator();
+    // 1st proto
+    ICING_ASSERT_OK(iterator.Advance());
+    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+                IsOkAndHolds(EqualsProto(document1)));
+    // 2nd proto
+    ICING_ASSERT_OK(iterator.Advance());
+    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+                IsOkAndHolds(EqualsProto(document2)));
+    // Tries to advance
+    ASSERT_THAT(iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+
+  {
+    // Iterator with bad filesystem
+    MockFilesystem mock_filesystem;
+    ON_CALL(mock_filesystem, GetFileSize(A<const char *>()))
+        .WillByDefault(Return(Filesystem::kBadFileSize));
+    FileBackedProtoLog<DocumentProto>::Iterator bad_iterator(
+        mock_filesystem, file_path_, /*initial_offset=*/0);
+    ASSERT_THAT(bad_iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, ComputeChecksum) {
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+  Crc32 checksum;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    ICING_EXPECT_OK(proto_log->WriteProto(document));
+
+    ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->ComputeChecksum());
+
+    // Calling it twice with no changes should get us the same checksum
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Checksum should be consistent across instances
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+    // PersistToDisk shouldn't affect the checksum value
+    ICING_EXPECT_OK(proto_log->PersistToDisk());
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+    // Check that modifying the log leads to a different checksum
+    ICING_EXPECT_OK(proto_log->WriteProto(document));
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, EraseProtoShouldSetZero) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Writes and erases proto
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                             proto_log->WriteProto(document1));
+  ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+  // Checks if the erased area is set to 0.
+  int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
+  MemoryMappedFile mmapped_file(filesystem_, file_path_,
+                                MemoryMappedFile::Strategy::READ_ONLY);
+
+  // document1_offset + sizeof(int) is the start byte of the proto where
+  // sizeof(int) is the size of the proto metadata.
+  mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1);
+  for (size_t i = 0; i < mmapped_file.region_size(); ++i) {
+    ASSERT_THAT(mmapped_file.region()[i], Eq(0));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Writes 2 protos
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                             proto_log->WriteProto(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document2_offset,
+                             proto_log->WriteProto(document2));
+
+  // Erases the first proto
+  ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+  // The first proto has been erased.
+  ASSERT_THAT(proto_log->ReadProto(document1_offset),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // The second proto should be returned.
+  ASSERT_THAT(proto_log->ReadProto(document2_offset),
+              IsOkAndHolds(EqualsProto(document2)));
+}
+
+TEST_F(FileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+  DocumentProto document3 =
+      DocumentBuilder().SetKey("namespace", "uri3").Build();
+  DocumentProto document4 =
+      DocumentBuilder().SetKey("namespace", "uri4").Build();
+
+  int64_t document2_offset;
+  int64_t document3_offset;
+
+  {
+    // Erase data after the rewind position. This won't update the checksum
+    // immediately.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Writes 3 protos
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                               proto_log->WriteProto(document1));
+    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+                               proto_log->WriteProto(document2));
+    ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
+                               proto_log->WriteProto(document3));
+
+    // Erases the 1st proto, checksum won't be updated immediately because the
+    // rewind position is 0.
+    ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(2293202502))));
+  }  // New checksum is updated in destructor.
+
+  {
+    // Erase data before the rewind position. This will update the checksum
+    // immediately.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Erases the 2nd proto that is now before the rewind position. Checksum is
+    // updated.
+    ICING_ASSERT_OK(proto_log->EraseProto(document2_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(639634028))));
+  }
+
+  {
+    // Append data and erase data before the rewind position. This will update
+    // the checksum twice: in EraseProto() and destructor.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Append a new document which is after the rewind position.
+    ICING_ASSERT_OK(proto_log->WriteProto(document4));
+
+    // Erases the 3rd proto that is now before the rewind position. Checksum is
+    // updated.
+    ICING_ASSERT_OK(proto_log->EraseProto(document3_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(1990198693))));
+  }  // Checksum is updated with the newly appended document.
+
+  {
+    // A successful creation means that the checksum matches.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+  }
+}
+
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/file/file-backed-proto.h b/icing/file/file-backed-proto.h
index d7d9bad..15a1953 100644
--- a/icing/file/file-backed-proto.h
+++ b/icing/file/file-backed-proto.h
@@ -63,17 +63,6 @@ class FileBackedProto {
   // file_path : Must be a path within in a directory that already exists.
   FileBackedProto(const Filesystem& filesystem, std::string_view file_path);
 
-  // Reset the internal file_path for the file backed proto.
-  // Example use:
-  //   auto file_backed_proto1 = *FileBackedProto<Proto>::Create(...);
-  //   auto file_backed_proto2 = *FileBackedProto<Proto>::Create(...);
-  //   filesystem.SwapFiles(file1, file2);
-  //   file_backed_proto1.SetSwappedFilepath(file2);
-  //   file_backed_proto2.SetSwappedFilepath(file1);
-  void SetSwappedFilepath(std::string_view swapped_to_file_path) {
-    file_path_ = swapped_to_file_path;
-  }
-
   // Returns a reference to the proto read from the file. It
   // internally caches the read proto so that future calls are fast.
   //
@@ -110,7 +99,7 @@ class FileBackedProto {
   mutable absl_ports::shared_mutex mutex_;
 
   const Filesystem* const filesystem_;
-  std::string file_path_;
+  const std::string file_path_;
 
   mutable std::unique_ptr<ProtoT> cached_proto_ ICING_GUARDED_BY(mutex_);
 };
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 7e42e32..0989935 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -56,9 +56,10 @@
 #ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
 #define ICING_FILE_FILE_BACKED_VECTOR_H_
 
+#include <inttypes.h>
+#include <stdint.h>
 #include <sys/mman.h>
 
-#include <cinttypes>
 #include <cstdint>
 #include <memory>
 #include <string>
@@ -586,11 +587,8 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
   }
 
   int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str());
-  if (current_file_size == Filesystem::kBadFileSize) {
-    return absl_ports::InternalError("Unable to retrieve file size.");
-  }
-
   int64_t least_file_size_needed = sizeof(Header) + num_elements * sizeof(T);
+
   if (least_file_size_needed <= current_file_size) {
     // Our underlying file can hold the target num_elements cause we've grown
     // before
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index ed94fa5..b05ce2d 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -14,30 +14,26 @@
 
 #include "icing/file/file-backed-vector.h"
 
-#include <unistd.h>
+#include <errno.h>
 
 #include <algorithm>
-#include <cerrno>
 #include <cstdint>
 #include <memory>
 #include <string_view>
 #include <vector>
 
-#include "knowledge/cerebra/sense/text_classifier/lib3/utils/base/status.h"
-#include "testing/base/public/gmock.h"
-#include "testing/base/public/gunit.h"
-#include "third_party/icing/file/filesystem.h"
-#include "third_party/icing/file/memory-mapped-file.h"
-#include "third_party/icing/file/mock-filesystem.h"
-#include "third_party/icing/testing/common-matchers.h"
-#include "third_party/icing/testing/tmp-directory.h"
-#include "third_party/icing/util/crc32.h"
-#include "third_party/icing/util/logging.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
 
 using ::testing::Eq;
 using ::testing::IsTrue;
 using ::testing::Pointee;
-using ::testing::Return;
 
 namespace icing {
 namespace lib {
@@ -78,8 +74,6 @@ class FileBackedVectorTest : public testing::Test {
     return std::string_view(vector->array() + idx, expected_len);
   }
 
-  const Filesystem& filesystem() const { return filesystem_; }
-
   Filesystem filesystem_;
   std::string file_path_;
   int fd_;
@@ -644,60 +638,6 @@ TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
   }
 }
 
-TEST_F(FileBackedVectorTest, RemapFailureStillValidInstance) {
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<FileBackedVector<int>> vector,
-      FileBackedVector<int>::Create(
-          *mock_filesystem, file_path_,
-          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
-
-  // 1. Write data to just before the first block resize. Running the test
-  // locally has determined that we'll first resize at 65531st entry.
-  constexpr int kResizingIndex = 16378;
-  for (int i = 0; i < kResizingIndex; ++i) {
-    ICING_ASSERT_OK(vector->Set(i, 7));
-  }
-
-  // 2. The next Set call should cause a resize and a remap. Make that remap
-  // fail.
-  int num_calls = 0;
-  auto open_lambda = [this, &num_calls](const char* file_name){
-    if (++num_calls == 2) {
-      return -1;
-    }
-    return this->filesystem().OpenForWrite(file_name);
-  };
-  ON_CALL(*mock_filesystem, OpenForWrite(_)).WillByDefault(open_lambda);
-  EXPECT_THAT(vector->Set(kResizingIndex, 7),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-
-  // 3. We should still be able to call set correctly for earlier regions.
-  ICING_EXPECT_OK(vector->Set(kResizingIndex / 2, 9));
-  EXPECT_THAT(vector->Get(kResizingIndex / 2), IsOkAndHolds(Pointee(Eq(9))));
-}
-
-TEST_F(FileBackedVectorTest, BadFileSizeDuringGrowReturnsError) {
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<FileBackedVector<int>> vector,
-      FileBackedVector<int>::Create(
-          *mock_filesystem, file_path_,
-          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
-
-  // At first, the vector is empty and has no mapping established. The first Set
-  // call will cause a Grow.
-  // During Grow, we will attempt to check the underlying file size to see if
-  // growing is actually necessary. Return an error on the call to GetFileSize.
-  ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
-      .WillByDefault(Return(Filesystem::kBadFileSize));
-
-  // We should fail gracefully and return an INTERNAL error to indicate that
-  // there was an issue retrieving the file size.
-  EXPECT_THAT(vector->Set(0, 7),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc
index 82b8d98..0655cb9 100644
--- a/icing/file/filesystem.cc
+++ b/icing/file/filesystem.cc
@@ -16,6 +16,7 @@
 
 #include <dirent.h>
 #include <dlfcn.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <fnmatch.h>
 #include <pthread.h>
@@ -25,7 +26,6 @@
 #include <unistd.h>
 
 #include <algorithm>
-#include <cerrno>
 #include <cstdint>
 #include <unordered_set>
 
diff --git a/icing/file/filesystem.h b/icing/file/filesystem.h
index dd2c5d1..6bed8e6 100644
--- a/icing/file/filesystem.h
+++ b/icing/file/filesystem.h
@@ -17,9 +17,11 @@
 #ifndef ICING_FILE_FILESYSTEM_H_
 #define ICING_FILE_FILESYSTEM_H_
 
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
 #include <cstdint>
-#include <cstdio>
-#include <cstring>
 #include <memory>
 #include <string>
 #include <unordered_set>
@@ -233,11 +235,6 @@ class Filesystem {
   // Increments to_increment by size if size is valid, or sets to_increment
   // to kBadFileSize if either size or to_increment is kBadFileSize.
   static void IncrementByOrSetInvalid(int64_t size, int64_t* to_increment);
-
-  // Return -1 if file_size is invalid. Otherwise, return file_size.
-  static int64_t SanitizeFileSize(int64_t file_size) {
-    return (file_size != kBadFileSize) ? file_size : -1;
-  }
 };
 // LINT.ThenChange(//depot/google3/icing/file/mock-filesystem.h)
 
diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc
index 9ff3adb..bda01f2 100644
--- a/icing/file/memory-mapped-file.cc
+++ b/icing/file/memory-mapped-file.cc
@@ -70,10 +70,10 @@ void MemoryMappedFile::MemoryMappedFile::Unmap() {
 
 libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset,
                                                    size_t mmap_size) {
-  if (mmap_size == 0) {
-    // First unmap any previously mmapped region.
-    Unmap();
+  // First unmap any previously mmapped region.
+  Unmap();
 
+  if (mmap_size == 0) {
     // Nothing more to do.
     return libtextclassifier3::Status::OK;
   }
@@ -118,19 +118,15 @@ libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset,
         "Unable to open file meant to be mmapped: ", file_path_));
   }
 
-  void* mmap_result = mmap(nullptr, adjusted_mmap_size, protection_flags,
-                           mmap_flags, fd.get(), aligned_offset);
+  mmap_result_ = mmap(nullptr, adjusted_mmap_size, protection_flags, mmap_flags,
+                      fd.get(), aligned_offset);
 
-  if (mmap_result == MAP_FAILED) {
+  if (mmap_result_ == MAP_FAILED) {
+    mmap_result_ = nullptr;
     return absl_ports::InternalError(absl_ports::StrCat(
         "Failed to mmap region due to error: ", strerror(errno)));
   }
 
-  // Now we know that we have successfully created a new mapping. We can free
-  // the old one and switch to the new one.
-  Unmap();
-
-  mmap_result_ = mmap_result;
   file_offset_ = file_offset;
   region_ = reinterpret_cast<char*>(mmap_result_) + alignment_adjustment;
   region_size_ = mmap_size;
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index 409ab96..825b763 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -64,6 +64,7 @@
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include <google/protobuf/io/gzip_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
@@ -71,7 +72,6 @@
 #include "icing/file/memory-mapped-file.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/portable/endian.h"
-#include "icing/portable/gzip_stream.h"
 #include "icing/portable/platform.h"
 #include "icing/portable/zlib.h"
 #include "icing/util/bit-util.h"
@@ -124,8 +124,6 @@ class PortableFileBackedProtoLog {
    public:
     static constexpr int32_t kMagic = 0xf4c6f67a;
 
-    // We should go directly from 0 to 2 the next time we have to change the
-    // format.
     static constexpr int32_t kFileFormatVersion = 0;
 
     uint32_t CalculateHeaderChecksum() const {
@@ -143,57 +141,49 @@ class PortableFileBackedProtoLog {
       return crc.Get();
     }
 
-    int32_t GetMagic() const { return GNetworkToHostL(magic_nbytes_); }
+    int32_t GetMagic() const { return gntohl(magic_nbytes_); }
 
-    void SetMagic(int32_t magic_in) {
-      magic_nbytes_ = GHostToNetworkL(magic_in);
-    }
+    void SetMagic(int32_t magic_in) { magic_nbytes_ = ghtonl(magic_in); }
 
     int32_t GetFileFormatVersion() const {
-      return GNetworkToHostL(file_format_version_nbytes_);
+      return gntohl(file_format_version_nbytes_);
     }
 
     void SetFileFormatVersion(int32_t file_format_version_in) {
-      file_format_version_nbytes_ = GHostToNetworkL(file_format_version_in);
+      file_format_version_nbytes_ = ghtonl(file_format_version_in);
     }
 
-    int32_t GetMaxProtoSize() const {
-      return GNetworkToHostL(max_proto_size_nbytes_);
-    }
+    int32_t GetMaxProtoSize() const { return gntohl(max_proto_size_nbytes_); }
 
     void SetMaxProtoSize(int32_t max_proto_size_in) {
-      max_proto_size_nbytes_ = GHostToNetworkL(max_proto_size_in);
+      max_proto_size_nbytes_ = ghtonl(max_proto_size_in);
     }
 
-    int32_t GetLogChecksum() const {
-      return GNetworkToHostL(log_checksum_nbytes_);
-    }
+    int32_t GetLogChecksum() const { return gntohl(log_checksum_nbytes_); }
 
     void SetLogChecksum(int32_t log_checksum_in) {
-      log_checksum_nbytes_ = GHostToNetworkL(log_checksum_in);
+      log_checksum_nbytes_ = ghtonl(log_checksum_in);
     }
 
-    int64_t GetRewindOffset() const {
-      return GNetworkToHostLL(rewind_offset_nbytes_);
-    }
+    int64_t GetRewindOffset() const { return gntohll(rewind_offset_nbytes_); }
 
     void SetRewindOffset(int64_t rewind_offset_in) {
-      rewind_offset_nbytes_ = GHostToNetworkLL(rewind_offset_in);
+      rewind_offset_nbytes_ = ghtonll(rewind_offset_in);
     }
 
     int32_t GetHeaderChecksum() const {
-      return GNetworkToHostL(header_checksum_nbytes_);
+      return gntohl(header_checksum_nbytes_);
     }
 
     void SetHeaderChecksum(int32_t header_checksum_in) {
-      header_checksum_nbytes_ = GHostToNetworkL(header_checksum_in);
+      header_checksum_nbytes_ = ghtonl(header_checksum_in);
     }
 
     bool GetCompressFlag() const { return GetFlag(kCompressBit); }
 
     void SetCompressFlag(bool compress) { SetFlag(kCompressBit, compress); }
 
-    bool GetDirtyFlag() const { return GetFlag(kDirtyBit); }
+    bool GetDirtyFlag() { return GetFlag(kDirtyBit); }
 
     void SetDirtyFlag(bool dirty) { SetFlag(kDirtyBit, dirty); }
 
@@ -219,7 +209,7 @@ class PortableFileBackedProtoLog {
     // Holds the magic as a quick sanity check against file corruption.
     //
     // Field is in network-byte order.
-    int32_t magic_nbytes_ = GHostToNetworkL(kMagic);
+    int32_t magic_nbytes_ = ghtonl(kMagic);
 
     // Must be at the beginning after kMagic. Contains the crc checksum of
     // the following fields.
@@ -233,7 +223,7 @@ class PortableFileBackedProtoLog {
     // valid instead of throwing away the entire log.
     //
     // Field is in network-byte order.
-    int64_t rewind_offset_nbytes_ = GHostToNetworkLL(kHeaderReservedBytes);
+    int64_t rewind_offset_nbytes_ = ghtonll(kHeaderReservedBytes);
 
     // Version number tracking how we serialize the file to disk. If we change
     // how/what we write to disk, this version should be updated and this class
@@ -284,7 +274,7 @@ class PortableFileBackedProtoLog {
     // before updating our checksum.
     bool recalculated_checksum = false;
 
-    bool has_data_loss() const {
+    bool has_data_loss() {
       return data_loss == DataLoss::PARTIAL || data_loss == DataLoss::COMPLETE;
     }
   };
@@ -378,7 +368,8 @@ class PortableFileBackedProtoLog {
   // }
   class Iterator {
    public:
-    Iterator(const Filesystem& filesystem, int fd, int64_t initial_offset);
+    Iterator(const Filesystem& filesystem, const std::string& file_path,
+             int64_t initial_offset);
 
     // Advances to the position of next proto whether it has been erased or not.
     //
@@ -394,12 +385,11 @@ class PortableFileBackedProtoLog {
    private:
     static constexpr int64_t kInvalidOffset = -1;
     // Used to read proto metadata
+    MemoryMappedFile mmapped_file_;
     // Offset of first proto
-    const Filesystem* const filesystem_;
     int64_t initial_offset_;
     int64_t current_offset_;
     int64_t file_size_;
-    int fd_;
   };
 
   // Returns an iterator of current proto log. The caller needs to keep the
@@ -515,7 +505,7 @@ class PortableFileBackedProtoLog {
       const Filesystem* filesystem, const std::string& file_path,
       Crc32 initial_crc, int64_t start, int64_t end);
 
-  // Reads out the metadata of a proto located at file_offset from the fd.
+  // Reads out the metadata of a proto located at file_offset from the file.
   // Metadata will be returned in host byte order endianness.
   //
   // Returns:
@@ -523,8 +513,7 @@ class PortableFileBackedProtoLog {
   //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
   //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
   static libtextclassifier3::StatusOr<int32_t> ReadProtoMetadata(
-      const Filesystem* const filesystem, int fd, int64_t file_offset,
-      int64_t file_size);
+      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
 
   // Writes metadata of a proto to the fd. Takes in a host byte order endianness
   // metadata and converts it into a portable metadata before writing.
@@ -579,6 +568,9 @@ class PortableFileBackedProtoLog {
 };
 
 template <typename ProtoT>
+constexpr uint8_t PortableFileBackedProtoLog<ProtoT>::kProtoMagic;
+
+template <typename ProtoT>
 PortableFileBackedProtoLog<ProtoT>::PortableFileBackedProtoLog(
     const Filesystem* filesystem, const std::string& file_path,
     std::unique_ptr<Header> header)
@@ -733,7 +725,7 @@ PortableFileBackedProtoLog<ProtoT>::InitializeExistingFile(
       return absl_ports::InternalError(IcingStringUtil::StringPrintf(
           "Failed to truncate '%s' to size %lld", file_path.data(),
           static_cast<long long>(header->GetRewindOffset())));
-    }
+    };
     data_loss = DataLoss::PARTIAL;
   }
 
@@ -889,11 +881,12 @@ PortableFileBackedProtoLog<ProtoT>::WriteProto(const ProtoT& proto) {
   google::protobuf::io::StringOutputStream proto_stream(&proto_str);
 
   if (header_->GetCompressFlag()) {
-    protobuf_ports::GzipOutputStream::Options options;
-    options.format = protobuf_ports::GzipOutputStream::ZLIB;
+    google::protobuf::io::GzipOutputStream::Options options;
+    options.format = google::protobuf::io::GzipOutputStream::ZLIB;
     options.compression_level = kDeflateCompressionLevel;
 
-    protobuf_ports::GzipOutputStream compressing_stream(&proto_stream, options);
+    google::protobuf::io::GzipOutputStream compressing_stream(&proto_stream,
+                                                                  options);
 
     bool success = proto.SerializeToZeroCopyStream(&compressing_stream) &&
                    compressing_stream.Close();
@@ -940,42 +933,40 @@ template <typename ProtoT>
 libtextclassifier3::StatusOr<ProtoT>
 PortableFileBackedProtoLog<ProtoT>::ReadProto(int64_t file_offset) const {
   int64_t file_size = filesystem_->GetFileSize(fd_.get());
-  // Read out the metadata
-  if (file_size == Filesystem::kBadFileSize) {
-    return absl_ports::OutOfRangeError("Unable to correctly read size.");
+  MemoryMappedFile mmapped_file(*filesystem_, file_path_,
+                                MemoryMappedFile::Strategy::READ_ONLY);
+  if (file_offset >= file_size) {
+    // file_size points to the next byte to write at, so subtract one to get
+    // the inclusive, actual size of file.
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
+                                      "out of range of the file size, %lld",
+                                      static_cast<long long>(file_offset),
+                                      static_cast<long long>(file_size - 1)));
   }
+
+  // Read out the metadata
   ICING_ASSIGN_OR_RETURN(
       int32_t metadata,
-      ReadProtoMetadata(filesystem_, fd_.get(), file_offset, file_size));
+      ReadProtoMetadata(&mmapped_file, file_offset, file_size));
 
   // Copy out however many bytes it says the proto is
   int stored_size = GetProtoSize(metadata);
-  file_offset += sizeof(metadata);
 
-  // Read the compressed proto out.
-  if (file_offset + stored_size > file_size) {
-    return absl_ports::OutOfRangeError(
-        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
-                                      "out of range of the file size, %lld",
-                                      static_cast<long long>(file_offset),
-                                      static_cast<long long>(file_size - 1)));
-  }
-  auto buf = std::make_unique<char[]>(stored_size);
-  if (!filesystem_->PRead(fd_.get(), buf.get(), stored_size, file_offset)) {
-    return absl_ports::InternalError("");
-  }
+  ICING_RETURN_IF_ERROR(
+      mmapped_file.Remap(file_offset + sizeof(metadata), stored_size));
 
-  if (IsEmptyBuffer(buf.get(), stored_size)) {
+  if (IsEmptyBuffer(mmapped_file.region(), mmapped_file.region_size())) {
     return absl_ports::NotFoundError("The proto data has been erased.");
   }
 
-  google::protobuf::io::ArrayInputStream proto_stream(buf.get(),
-                                                          stored_size);
+  google::protobuf::io::ArrayInputStream proto_stream(
+      mmapped_file.mutable_region(), stored_size);
 
   // Deserialize proto
   ProtoT proto;
   if (header_->GetCompressFlag()) {
-    protobuf_ports::GzipInputStream decompress_stream(&proto_stream);
+    google::protobuf::io::GzipInputStream decompress_stream(&proto_stream);
     proto.ParseFromZeroCopyStream(&decompress_stream);
   } else {
     proto.ParseFromZeroCopyStream(&proto_stream);
@@ -988,29 +979,33 @@ template <typename ProtoT>
 libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::EraseProto(
     int64_t file_offset) {
   int64_t file_size = filesystem_->GetFileSize(fd_.get());
-  if (file_size == Filesystem::kBadFileSize) {
-    return absl_ports::OutOfRangeError("Unable to correctly read size.");
+  if (file_offset >= file_size) {
+    // file_size points to the next byte to write at, so subtract one to get
+    // the inclusive, actual size of file.
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Trying to erase data at a location, %lld, "
+        "out of range of the file size, %lld",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size - 1)));
   }
 
+  MemoryMappedFile mmapped_file(
+      *filesystem_, file_path_,
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
+
+  // Read out the metadata
   ICING_ASSIGN_OR_RETURN(
       int32_t metadata,
-      ReadProtoMetadata(filesystem_, fd_.get(), file_offset, file_size));
-  // Copy out however many bytes it says the proto is
-  int stored_size = GetProtoSize(metadata);
-  file_offset += sizeof(metadata);
-  if (file_offset + stored_size > file_size) {
-    return absl_ports::OutOfRangeError(
-        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
-                                      "out of range of the file size, %lld",
-                                      static_cast<long long>(file_offset),
-                                      static_cast<long long>(file_size - 1)));
-  }
-  auto buf = std::make_unique<char[]>(stored_size);
+      ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+
+  ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
+                                           GetProtoSize(metadata)));
 
   // We need to update the crc checksum if the erased area is before the
   // rewind position.
   int32_t new_crc;
-  if (file_offset < header_->GetRewindOffset()) {
+  int64_t erased_proto_offset = file_offset + sizeof(metadata);
+  if (erased_proto_offset < header_->GetRewindOffset()) {
     // Set to "dirty" before we start writing anything.
     header_->SetDirtyFlag(true);
     header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
@@ -1023,30 +1018,24 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::EraseProto(
     // We need to calculate [original string xor 0s].
     // The xored string is the same as the original string because 0 xor 0 =
     // 0, 1 xor 0 = 1.
-    // Read the compressed proto out.
-    if (!filesystem_->PRead(fd_.get(), buf.get(), stored_size, file_offset)) {
-      return absl_ports::InternalError("");
-    }
-    const std::string_view xored_str(buf.get(), stored_size);
+    const std::string_view xored_str(mmapped_file.region(),
+                                     mmapped_file.region_size());
 
     Crc32 crc(header_->GetLogChecksum());
     ICING_ASSIGN_OR_RETURN(
-        new_crc,
-        crc.UpdateWithXor(xored_str,
-                          /*full_data_size=*/header_->GetRewindOffset() -
-                              kHeaderReservedBytes,
-                          /*position=*/file_offset - kHeaderReservedBytes));
+        new_crc, crc.UpdateWithXor(
+                     xored_str,
+                     /*full_data_size=*/header_->GetRewindOffset() -
+                         kHeaderReservedBytes,
+                     /*position=*/erased_proto_offset - kHeaderReservedBytes));
   }
 
   // Clear the region.
-  memset(buf.get(), '\0', stored_size);
-  if (!filesystem_->PWrite(fd_.get(), file_offset, buf.get(), stored_size)) {
-    return absl_ports::InternalError("");
-  }
+  memset(mmapped_file.mutable_region(), '\0', mmapped_file.region_size());
 
   // If we cleared something in our checksummed area, we should update our
   // checksum and reset our dirty bit.
-  if (file_offset < header_->GetRewindOffset()) {
+  if (erased_proto_offset < header_->GetRewindOffset()) {
     header_->SetDirtyFlag(false);
     header_->SetLogChecksum(new_crc);
     header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
@@ -1084,12 +1073,13 @@ PortableFileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
 
 template <typename ProtoT>
 PortableFileBackedProtoLog<ProtoT>::Iterator::Iterator(
-    const Filesystem& filesystem, int fd, int64_t initial_offset)
-    : filesystem_(&filesystem),
+    const Filesystem& filesystem, const std::string& file_path,
+    int64_t initial_offset)
+    : mmapped_file_(filesystem, file_path,
+                    MemoryMappedFile::Strategy::READ_ONLY),
       initial_offset_(initial_offset),
       current_offset_(kInvalidOffset),
-      fd_(fd) {
-  file_size_ = filesystem_->GetFileSize(fd_);
+      file_size_(filesystem.GetFileSize(file_path.c_str())) {
   if (file_size_ == Filesystem::kBadFileSize) {
     // Fails all Advance() calls
     file_size_ = 0;
@@ -1106,7 +1096,7 @@ PortableFileBackedProtoLog<ProtoT>::Iterator::Advance() {
     // Jumps to the next proto position
     ICING_ASSIGN_OR_RETURN(
         int32_t metadata,
-        ReadProtoMetadata(filesystem_, fd_, current_offset_, file_size_));
+        ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_));
     current_offset_ += sizeof(metadata) + GetProtoSize(metadata);
   }
 
@@ -1128,15 +1118,14 @@ int64_t PortableFileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
 template <typename ProtoT>
 typename PortableFileBackedProtoLog<ProtoT>::Iterator
 PortableFileBackedProtoLog<ProtoT>::GetIterator() {
-  return Iterator(*filesystem_, fd_.get(),
+  return Iterator(*filesystem_, file_path_,
                   /*initial_offset=*/kHeaderReservedBytes);
 }
 
 template <typename ProtoT>
 libtextclassifier3::StatusOr<int32_t>
 PortableFileBackedProtoLog<ProtoT>::ReadProtoMetadata(
-    const Filesystem* const filesystem, int fd, int64_t file_offset,
-    int64_t file_size) {
+    MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size) {
   // Checks file_offset
   if (file_offset >= file_size) {
     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
@@ -1154,12 +1143,12 @@ PortableFileBackedProtoLog<ProtoT>::ReadProtoMetadata(
         static_cast<long long>(file_size)));
   }
 
-  if (!filesystem->PRead(fd, &portable_metadata, metadata_size, file_offset)) {
-    return absl_ports::InternalError("");
-  }
+  // Reads metadata
+  ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size));
+  memcpy(&portable_metadata, mmapped_file->region(), metadata_size);
 
   // Need to switch it back to host order endianness after reading from disk.
-  int32_t host_order_metadata = GNetworkToHostL(portable_metadata);
+  int32_t host_order_metadata = gntohl(portable_metadata);
 
   // Checks magic number
   uint8_t stored_k_proto_magic = GetProtoMagic(host_order_metadata);
@@ -1177,7 +1166,7 @@ libtextclassifier3::Status
 PortableFileBackedProtoLog<ProtoT>::WriteProtoMetadata(
     const Filesystem* filesystem, int fd, int32_t host_order_metadata) {
   // Convert it into portable endian format before writing to disk
-  int32_t portable_metadata = GHostToNetworkL(host_order_metadata);
+  int32_t portable_metadata = ghtonl(host_order_metadata);
   int portable_metadata_size = sizeof(portable_metadata);
 
   // Write metadata
@@ -1197,7 +1186,21 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() {
     return libtextclassifier3::Status::OK;
   }
 
-  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
+  int64_t new_content_size = file_size - header_->GetRewindOffset();
+  Crc32 crc;
+  if (new_content_size < 0) {
+    // File shrunk, recalculate the entire checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc,
+        ComputeChecksum(filesystem_, file_path_, Crc32(),
+                        /*start=*/kHeaderReservedBytes, /*end=*/file_size));
+  } else {
+    // Append new changes to the existing checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc, ComputeChecksum(filesystem_, file_path_,
+                             Crc32(header_->GetLogChecksum()),
+                             header_->GetRewindOffset(), file_size));
+  }
 
   header_->SetLogChecksum(crc.Get());
   header_->SetRewindOffset(file_size);
@@ -1216,26 +1219,9 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() {
 template <typename ProtoT>
 libtextclassifier3::StatusOr<Crc32>
 PortableFileBackedProtoLog<ProtoT>::ComputeChecksum() {
-  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
-  int64_t new_content_size = file_size - header_->GetRewindOffset();
-  Crc32 crc;
-  if (new_content_size == 0) {
-    // No new protos appended, return cached checksum
-    return Crc32(header_->GetLogChecksum());
-  } else if (new_content_size < 0) {
-    // File shrunk, recalculate the entire checksum.
-    ICING_ASSIGN_OR_RETURN(
-        crc,
-        ComputeChecksum(filesystem_, file_path_, Crc32(),
-                        /*start=*/kHeaderReservedBytes, /*end=*/file_size));
-  } else {
-    // Append new changes to the existing checksum.
-    ICING_ASSIGN_OR_RETURN(
-        crc, ComputeChecksum(
-                 filesystem_, file_path_, Crc32(header_->GetLogChecksum()),
-                 /*start=*/header_->GetRewindOffset(), /*end=*/file_size));
-  }
-  return crc;
+  return PortableFileBackedProtoLog<ProtoT>::ComputeChecksum(
+      filesystem_, file_path_, Crc32(), /*start=*/kHeaderReservedBytes,
+      /*end=*/filesystem_->GetFileSize(file_path_.c_str()));
 }
 
 }  // namespace lib
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
index 80a8011..04ccab0 100644
--- a/icing/file/portable-file-backed-proto-log_benchmark.cc
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -55,7 +55,7 @@ namespace lib {
 
 namespace {
 
-void BM_Write(benchmark::State& state) {
+static void BM_Write(benchmark::State& state) {
   const Filesystem filesystem;
   int string_length = state.range(0);
   const std::string file_path = IcingStringUtil::StringPrintf(
@@ -108,7 +108,7 @@ BENCHMARK(BM_Write)
                               // 16MiB, and we need some extra space for the
                               // rest of the document properties
 
-void BM_Read(benchmark::State& state) {
+static void BM_Read(benchmark::State& state) {
   const Filesystem filesystem;
   int string_length = state.range(0);
   const std::string file_path = IcingStringUtil::StringPrintf(
@@ -164,7 +164,7 @@ BENCHMARK(BM_Read)
                               // 16MiB, and we need some extra space for the
                               // rest of the document properties
                               //
-void BM_Erase(benchmark::State& state) {
+static void BM_Erase(benchmark::State& state) {
   const Filesystem filesystem;
   const std::string file_path = IcingStringUtil::StringPrintf(
       "%s%s", GetTestTempDir().c_str(), "/proto.log");
@@ -204,7 +204,7 @@ void BM_Erase(benchmark::State& state) {
 }
 BENCHMARK(BM_Erase);
 
-void BM_ComputeChecksum(benchmark::State& state) {
+static void BM_ComputeChecksum(benchmark::State& state) {
   const Filesystem filesystem;
   const std::string file_path = GetTestTempDir() + "/proto.log";
   int max_proto_size = (1 << 24) - 1;  // 16 MiB
@@ -246,98 +246,6 @@ void BM_ComputeChecksum(benchmark::State& state) {
 }
 BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
 
-void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) {
-  const Filesystem filesystem;
-  const std::string file_path = GetTestTempDir() + "/proto.log";
-  int max_proto_size = (1 << 24) - 1;  // 16 MiB
-  bool compress = true;
-
-  // Make sure it doesn't already exist.
-  filesystem.DeleteFile(file_path.c_str());
-
-  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
-                       &filesystem, file_path,
-                       PortableFileBackedProtoLog<DocumentProto>::Options(
-                           compress, max_proto_size))
-                       .ValueOrDie()
-                       .proto_log;
-
-  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
-
-  // Make the document 1KiB
-  int string_length = 1024;
-  std::default_random_engine random;
-  const std::string rand_str =
-      RandomString(kAlNumAlphabet, string_length, &random);
-
-  auto document_properties = document.add_properties();
-  document_properties->set_name("string property");
-  document_properties->add_string_values(rand_str);
-
-  // Write some content and persist. This should update our cached checksum to
-  // include the document.
-  ICING_ASSERT_OK(proto_log->WriteProto(document));
-  ICING_ASSERT_OK(proto_log->PersistToDisk());
-
-  // This ComputeChecksum call shouldn't need to do any computation since we can
-  // reuse our cached checksum.
-  for (auto _ : state) {
-    testing::DoNotOptimize(proto_log->ComputeChecksum());
-  }
-
-  // Cleanup after ourselves
-  filesystem.DeleteFile(file_path.c_str());
-}
-BENCHMARK(BM_ComputeChecksumWithCachedChecksum);
-
-void BM_ComputeChecksumOnlyForTail(benchmark::State& state) {
-  const Filesystem filesystem;
-  const std::string file_path = GetTestTempDir() + "/proto.log";
-  int max_proto_size = (1 << 24) - 1;  // 16 MiB
-  bool compress = true;
-
-  // Make sure it doesn't already exist.
-  filesystem.DeleteFile(file_path.c_str());
-
-  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
-                       &filesystem, file_path,
-                       PortableFileBackedProtoLog<DocumentProto>::Options(
-                           compress, max_proto_size))
-                       .ValueOrDie()
-                       .proto_log;
-
-  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
-
-  // Make the document 1KiB
-  int string_length = 1024;
-  std::default_random_engine random;
-  const std::string rand_str =
-      RandomString(kAlNumAlphabet, string_length, &random);
-
-  auto document_properties = document.add_properties();
-  document_properties->set_name("string property");
-  document_properties->add_string_values(rand_str);
-
-  // Write some content and persist. This should update our cached checksum to
-  // include the document.
-  ICING_ASSERT_OK(proto_log->WriteProto(document));
-  ICING_ASSERT_OK(proto_log->PersistToDisk());
-
-  // Write another proto into the tail, but it's not included in our cached
-  // checksum since we didn't call persist.
-  ICING_ASSERT_OK(proto_log->WriteProto(document));
-
-  // ComputeChecksum should be calculating the checksum of the tail and adding
-  // it to the cached checksum we have.
-  for (auto _ : state) {
-    testing::DoNotOptimize(proto_log->ComputeChecksum());
-  }
-
-  // Cleanup after ourselves
-  filesystem.DeleteFile(file_path.c_str());
-}
-BENCHMARK(BM_ComputeChecksumOnlyForTail);
-
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index 795271a..b5fee4b 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -851,12 +851,11 @@ TEST_F(PortableFileBackedProtoLogTest, Iterator) {
 
   {
     // Iterator with bad filesystem
-    ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str()));
     MockFilesystem mock_filesystem;
-    ON_CALL(mock_filesystem, GetFileSize(A<int>()))
+    ON_CALL(mock_filesystem, GetFileSize(A<const char*>()))
         .WillByDefault(Return(Filesystem::kBadFileSize));
     PortableFileBackedProtoLog<DocumentProto>::Iterator bad_iterator(
-        mock_filesystem, sfd.get(), /*initial_offset=*/0);
+        mock_filesystem, file_path_, /*initial_offset=*/0);
     ASSERT_THAT(bad_iterator.Advance(),
                 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
   }
diff --git a/icing/testing/icu-data-file-helper.cc b/icing/helpers/icu/icu-data-file-helper.cc
index aaeb738..6607c40 100644
--- a/icing/testing/icu-data-file-helper.cc
+++ b/icing/helpers/icu/icu-data-file-helper.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/testing/icu-data-file-helper.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 
 #include <sys/mman.h>
 
diff --git a/icing/testing/icu-data-file-helper.h b/icing/helpers/icu/icu-data-file-helper.h
index d0276e7..90f5bc7 100644
--- a/icing/testing/icu-data-file-helper.h
+++ b/icing/helpers/icu/icu-data-file-helper.h
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_TESTING_ICU_DATA_FILE_HELPER
-#define ICING_TESTING_ICU_DATA_FILE_HELPER
+#ifndef ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
+#define ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 
@@ -40,4 +40,4 @@ libtextclassifier3::Status SetUpICUDataFile(
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_TESTING_ICU_DATA_FILE_HELPER
+#endif  // ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 1012b47..48e81e5 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -37,13 +37,13 @@ namespace {
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::Eq;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
-    PropertyConfigProto::Cardinality::REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
 
 std::string GetTestBaseDir() {
   return GetTestTempDir() + "/icing_with_icu_files";
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 952ba21..20a6bb9 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -18,7 +18,6 @@
 #include <memory>
 #include <string>
 #include <string_view>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -36,7 +35,6 @@
 #include "icing/index/index.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/index/icing-filesystem.h"
-#include "icing/portable/endian.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
 #include "icing/proto/internal/optimize.pb.h"
@@ -48,7 +46,6 @@
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
 #include "icing/query/query-processor.h"
-#include "icing/query/suggestion-processor.h"
 #include "icing/result/projection-tree.h"
 #include "icing/result/projector.h"
 #include "icing/result/result-retriever.h"
@@ -60,7 +57,6 @@
 #include "icing/scoring/scoring-processor.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
-#include "icing/store/namespace-checker-impl.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer-factory.h"
@@ -81,30 +77,19 @@ constexpr std::string_view kDocumentSubfolderName = "document_dir";
 constexpr std::string_view kIndexSubfolderName = "index_dir";
 constexpr std::string_view kSchemaSubfolderName = "schema_dir";
 constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
-constexpr std::string_view kInitMarkerFilename = "init_marker";
 constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
 
-// The maximum number of unsuccessful initialization attempts from the current
-// state that we will tolerate before deleting all data and starting from a
-// fresh state.
-constexpr int kMaxUnsuccessfulInitAttempts = 5;
-
-// A pair that holds namespace and type.
-struct NamespaceTypePair {
-  std::string namespace_;
-  std::string type;
-
-  bool operator==(const NamespaceTypePair& other) const {
-    return namespace_ == other.namespace_ && type == other.type;
-  }
-};
-
-struct NamespaceTypePairHasher {
-  std::size_t operator()(const NamespaceTypePair& pair) const {
-    return std::hash<std::string>()(pair.namespace_) ^
-           std::hash<std::string>()(pair.type);
+libtextclassifier3::Status ValidateOptions(
+    const IcingSearchEngineOptions& options) {
+  // These options are only used in IndexProcessor, which won't be created
+  // until the first Put call. So they must be checked here, so that any
+  // errors can be surfaced in Initialize.
+  if (options.max_tokens_per_doc() <= 0) {
+    return absl_ports::InvalidArgumentError(
+        "Options::max_tokens_per_doc must be greater than zero.");
   }
-};
+  return libtextclassifier3::Status::OK;
+}
 
 libtextclassifier3::Status ValidateResultSpec(
     const ResultSpecProto& result_spec) {
@@ -142,29 +127,14 @@ libtextclassifier3::Status ValidateSearchSpec(
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status ValidateSuggestionSpec(
-    const SuggestionSpecProto& suggestion_spec,
-    const PerformanceConfiguration& configuration) {
-  if (suggestion_spec.prefix().empty()) {
-    return absl_ports::InvalidArgumentError(
-        absl_ports::StrCat("SuggestionSpecProto.prefix is empty!"));
-  }
-  if (suggestion_spec.scoring_spec().scoring_match_type() ==
-      TermMatchType::UNKNOWN) {
-    return absl_ports::InvalidArgumentError(
-        absl_ports::StrCat("SuggestionSpecProto.term_match_type is unknown!"));
-  }
-  if (suggestion_spec.num_to_return() <= 0) {
-    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
-        "SuggestionSpecProto.num_to_return must be positive."));
-  }
-  if (suggestion_spec.prefix().size() > configuration.max_query_length) {
-    return absl_ports::InvalidArgumentError(
-        absl_ports::StrCat("SuggestionSpecProto.prefix is longer than the "
-                           "maximum allowed prefix length: ",
-                           std::to_string(configuration.max_query_length)));
-  }
-  return libtextclassifier3::Status::OK;
+IndexProcessor::Options CreateIndexProcessorOptions(
+    const IcingSearchEngineOptions& options) {
+  IndexProcessor::Options index_processor_options;
+  index_processor_options.max_tokens_per_document =
+      options.max_tokens_per_doc();
+  index_processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kSuppressError;
+  return index_processor_options;
 }
 
 // Document store files are in a standalone subfolder for easier file
@@ -194,15 +164,10 @@ std::string MakeIndexDirectoryPath(const std::string& base_dir) {
 std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kSchemaSubfolderName);
 }
-
 std::string MakeSetSchemaMarkerFilePath(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kSetSchemaMarkerFilename);
 }
 
-std::string MakeInitMarkerFilePath(const std::string& base_dir) {
-  return absl_ports::StrCat(base_dir, "/", kInitMarkerFilename);
-}
-
 void TransformStatus(const libtextclassifier3::Status& internal_status,
                      StatusProto* status_proto) {
   StatusProto::Code code;
@@ -273,28 +238,6 @@ void TransformStatus(const libtextclassifier3::Status& internal_status,
   status_proto->set_message(internal_status.error_message());
 }
 
-libtextclassifier3::Status RetrieveAndAddDocumentInfo(
-    const DocumentStore* document_store, DeleteByQueryResultProto& result_proto,
-    std::unordered_map<NamespaceTypePair,
-                       DeleteByQueryResultProto::DocumentGroupInfo*,
-                       NamespaceTypePairHasher>& info_map,
-    DocumentId document_id) {
-  ICING_ASSIGN_OR_RETURN(DocumentProto document,
-                         document_store->Get(document_id));
-  NamespaceTypePair key = {document.namespace_(), document.schema()};
-  auto iter = info_map.find(key);
-  if (iter == info_map.end()) {
-    auto entry = result_proto.add_deleted_documents();
-    entry->set_namespace_(std::move(document.namespace_()));
-    entry->set_schema(std::move(document.schema()));
-    entry->add_uris(std::move(document.uri()));
-    info_map[key] = entry;
-  } else {
-    iter->second->add_uris(std::move(document.uri()));
-  }
-  return libtextclassifier3::Status::OK;
-}
-
 }  // namespace
 
 IcingSearchEngine::IcingSearchEngine(const IcingSearchEngineOptions& options,
@@ -333,66 +276,6 @@ InitializeResultProto IcingSearchEngine::Initialize() {
   return InternalInitialize();
 }
 
-void IcingSearchEngine::ResetMembers() {
-  schema_store_.reset();
-  document_store_.reset();
-  language_segmenter_.reset();
-  normalizer_.reset();
-  index_.reset();
-}
-
-libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
-    InitializeStatsProto* initialize_stats) {
-  // Check to see if the marker file exists and if we've already passed our max
-  // number of init attempts.
-  std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
-  bool file_exists = filesystem_->FileExists(marker_filepath.c_str());
-  int network_init_attempts = 0;
-  int host_init_attempts = 0;
-
-  // Read the number of previous failed init attempts from the file. If it
-  // fails, then just assume the value is zero (the most likely reason for
-  // failure would be non-existence because the last init was successful
-  // anyways).
-  ScopedFd marker_file_fd(filesystem_->OpenForWrite(marker_filepath.c_str()));
-  libtextclassifier3::Status status;
-  if (file_exists &&
-      filesystem_->PRead(marker_file_fd.get(), &network_init_attempts,
-                         sizeof(network_init_attempts), /*offset=*/0)) {
-    host_init_attempts = GNetworkToHostL(network_init_attempts);
-    if (host_init_attempts > kMaxUnsuccessfulInitAttempts) {
-      // We're tried and failed to init too many times. We need to throw
-      // everything out and start from scratch.
-      ResetMembers();
-      if (!filesystem_->DeleteDirectoryRecursively(
-              options_.base_dir().c_str())) {
-        return absl_ports::InternalError("Failed to delete icing base dir!");
-      }
-      status = absl_ports::DataLossError(
-          "Encountered failed initialization limit. Cleared all data.");
-      host_init_attempts = 0;
-    }
-  }
-
-  // Use network_init_attempts here because we might have set host_init_attempts
-  // to 0 if it exceeded the max threshold.
-  initialize_stats->set_num_previous_init_failures(
-      GNetworkToHostL(network_init_attempts));
-
-  ++host_init_attempts;
-  network_init_attempts = GHostToNetworkL(host_init_attempts);
-  // Write the updated number of attempts before we get started.
-  if (!filesystem_->PWrite(marker_file_fd.get(), /*offset=*/0,
-                           &network_init_attempts,
-                           sizeof(network_init_attempts)) ||
-      !filesystem_->DataSync(marker_file_fd.get())) {
-    return absl_ports::InternalError(
-        "Failed to write and sync init marker file");
-  }
-
-  return status;
-}
-
 InitializeResultProto IcingSearchEngine::InternalInitialize() {
   ICING_VLOG(1) << "Initializing IcingSearchEngine in dir: "
                 << options_.base_dir();
@@ -413,17 +296,9 @@ InitializeResultProto IcingSearchEngine::InternalInitialize() {
     return result_proto;
   }
 
-  // Now go ahead and try to initialize.
   libtextclassifier3::Status status = InitializeMembers(initialize_stats);
   if (status.ok() || absl_ports::IsDataLoss(status)) {
-    // We successfully initialized. We should delete the init marker file to
-    // indicate a successful init.
-    std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
-    if (!filesystem_->DeleteFile(marker_filepath.c_str())) {
-      status = absl_ports::InternalError("Failed to delete init marker file!");
-    } else {
-      initialized_ = true;
-    }
+    initialized_ = true;
   }
   TransformStatus(status, result_status);
   initialize_stats->set_latency_ms(initialize_timer->GetElapsedMilliseconds());
@@ -433,20 +308,7 @@ InitializeResultProto IcingSearchEngine::InternalInitialize() {
 libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
     InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
-
-  // Make sure the base directory exists
-  if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Could not create directory: ", options_.base_dir()));
-  }
-
-  // Check to see if the marker file exists and if we've already passed our max
-  // number of init attempts.
-  libtextclassifier3::Status status = CheckInitMarkerFile(initialize_stats);
-  if (!status.ok() && !absl_ports::IsDataLoss(status)) {
-    return status;
-  }
-
+  ICING_RETURN_IF_ERROR(InitializeOptions());
   ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
 
   // TODO(b/156383798) : Resolve how to specify the locale.
@@ -460,7 +322,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
 
   std::string marker_filepath =
       MakeSetSchemaMarkerFilePath(options_.base_dir());
-  libtextclassifier3::Status index_init_status;
+  libtextclassifier3::Status status;
   if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
     // The schema was either lost or never set before. Wipe out the doc store
     // and index directories and initialize them from scratch.
@@ -474,15 +336,14 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
     }
     ICING_RETURN_IF_ERROR(InitializeDocumentStore(
         /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
-    index_init_status = InitializeIndex(initialize_stats);
-    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
-      return index_init_status;
-    }
+    status = InitializeIndex(initialize_stats);
   } else if (filesystem_->FileExists(marker_filepath.c_str())) {
     // If the marker file is still around then something wonky happened when we
     // last tried to set the schema.
     ICING_RETURN_IF_ERROR(InitializeDocumentStore(
         /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
+    initialize_stats->set_document_store_recovery_cause(
+        InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
 
     // We're going to need to build the index from scratch. So just delete its
     // files now.
@@ -499,12 +360,12 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
 
     std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
     IndexRestorationResult restore_result = RestoreIndexIfNeeded();
-    index_init_status = std::move(restore_result.status);
+    status = std::move(restore_result.status);
     // DATA_LOSS means that we have successfully initialized and re-added
     // content to the index. Some indexed content was lost, but otherwise the
     // index is in a valid state and can be queried.
-    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
-      return index_init_status;
+    if (!status.ok() && !absl_ports::IsDataLoss(status)) {
+      return status;
     }
 
     // Delete the marker file to indicate that everything is now in sync with
@@ -518,22 +379,30 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
   } else {
     ICING_RETURN_IF_ERROR(InitializeDocumentStore(
         /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
-    index_init_status = InitializeIndex(initialize_stats);
-    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
-      return index_init_status;
+    status = InitializeIndex(initialize_stats);
+    if (!status.ok() && !absl_ports::IsDataLoss(status)) {
+      return status;
     }
   }
 
-  if (status.ok()) {
-    status = index_init_status;
-  }
-
   result_state_manager_ = std::make_unique<ResultStateManager>(
       performance_configuration_.max_num_total_hits, *document_store_);
 
   return status;
 }
 
+libtextclassifier3::Status IcingSearchEngine::InitializeOptions() {
+  ICING_RETURN_IF_ERROR(ValidateOptions(options_));
+
+  // Make sure the base directory exists
+  if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Could not create directory: ", options_.base_dir()));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
 libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
     InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
@@ -633,18 +502,15 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
   StatusProto* result_status = result_proto.mutable_status();
 
   absl_ports::unique_lock l(&mutex_);
-  std::unique_ptr<Timer> timer = clock_->GetNewTimer();
   if (!initialized_) {
     result_status->set_code(StatusProto::FAILED_PRECONDITION);
     result_status->set_message("IcingSearchEngine has not been initialized!");
-    result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
     return result_proto;
   }
 
   auto lost_previous_schema_or = LostPreviousSchema();
   if (!lost_previous_schema_or.ok()) {
     TransformStatus(lost_previous_schema_or.status(), result_status);
-    result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
     return result_proto;
   }
   bool lost_previous_schema = lost_previous_schema_or.ValueOrDie();
@@ -662,11 +528,10 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
       std::move(new_schema), ignore_errors_and_delete_documents);
   if (!set_schema_result_or.ok()) {
     TransformStatus(set_schema_result_or.status(), result_status);
-    result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
     return result_proto;
   }
-  SchemaStore::SetSchemaResult set_schema_result =
-      std::move(set_schema_result_or).ValueOrDie();
+  const SchemaStore::SetSchemaResult set_schema_result =
+      set_schema_result_or.ValueOrDie();
 
   for (const std::string& deleted_type :
        set_schema_result.schema_types_deleted_by_name) {
@@ -678,25 +543,6 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
     result_proto.add_incompatible_schema_types(incompatible_type);
   }
 
-  for (const std::string& new_type :
-       set_schema_result.schema_types_new_by_name) {
-    result_proto.add_new_schema_types(std::move(new_type));
-  }
-
-  for (const std::string& compatible_type :
-       set_schema_result.schema_types_changed_fully_compatible_by_name) {
-    result_proto.add_fully_compatible_changed_schema_types(
-        std::move(compatible_type));
-  }
-
-  bool index_incompatible =
-      !set_schema_result.schema_types_index_incompatible_by_name.empty();
-  for (const std::string& index_incompatible_type :
-       set_schema_result.schema_types_index_incompatible_by_name) {
-    result_proto.add_index_incompatible_changed_schema_types(
-        std::move(index_incompatible_type));
-  }
-
   libtextclassifier3::Status status;
   if (set_schema_result.success) {
     if (lost_previous_schema) {
@@ -705,7 +551,6 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
       status = document_store_->UpdateSchemaStore(schema_store_.get());
       if (!status.ok()) {
         TransformStatus(status, result_status);
-        result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
         return result_proto;
       }
     } else if (!set_schema_result.old_schema_type_ids_changed.empty() ||
@@ -715,17 +560,15 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
                                                            set_schema_result);
       if (!status.ok()) {
         TransformStatus(status, result_status);
-        result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
         return result_proto;
       }
     }
 
-    if (lost_previous_schema || index_incompatible) {
+    if (lost_previous_schema || set_schema_result.index_incompatible) {
       // Clears all index files
       status = index_->Reset();
       if (!status.ok()) {
         TransformStatus(status, result_status);
-        result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
         return result_proto;
       }
 
@@ -736,7 +579,6 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
       if (!restore_result.status.ok() &&
           !absl_ports::IsDataLoss(restore_result.status)) {
         TransformStatus(status, result_status);
-        result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
         return result_proto;
       }
     }
@@ -747,7 +589,6 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
     result_status->set_message("Schema is incompatible.");
   }
 
-  result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
   return result_proto;
 }
 
@@ -841,8 +682,9 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
   }
   DocumentId document_id = document_id_or.ValueOrDie();
 
-  auto index_processor_or =
-      IndexProcessor::Create(normalizer_.get(), index_.get(), clock_.get());
+  auto index_processor_or = IndexProcessor::Create(
+      normalizer_.get(), index_.get(), CreateIndexProcessorOptions(options_),
+      clock_.get());
   if (!index_processor_or.ok()) {
     TransformStatus(index_processor_or.status(), result_status);
     put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
@@ -853,17 +695,6 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
 
   auto status = index_processor->IndexDocument(tokenized_document, document_id,
                                                put_document_stats);
-  if (!status.ok()) {
-    // If we encountered a failure while indexing this document, then mark it as
-    // deleted.
-    libtextclassifier3::Status delete_status =
-        document_store_->Delete(document_id);
-    if (!delete_status.ok()) {
-      // This is pretty dire (and, hopefully, unlikely). We can't roll back the
-      // document that we just added. Wipeout the whole index.
-      ResetInternal();
-    }
-  }
 
   TransformStatus(status, result_status);
   put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
@@ -972,7 +803,7 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SINGLE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = document_store_->Delete(name_space, uri);
   if (!status.ok()) {
@@ -1006,7 +837,7 @@ DeleteByNamespaceResultProto IcingSearchEngine::DeleteByNamespace(
   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   DocumentStore::DeleteByGroupResult doc_store_result =
       document_store_->DeleteByNamespace(name_space);
@@ -1040,7 +871,7 @@ DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   DocumentStore::DeleteByGroupResult doc_store_result =
       document_store_->DeleteBySchemaType(schema_type);
@@ -1058,7 +889,7 @@ DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
 }
 
 DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
-    const SearchSpecProto& search_spec, bool return_deleted_document_info) {
+    const SearchSpecProto& search_spec) {
   ICING_VLOG(1) << "Deleting documents for query " << search_spec.query()
                 << " from doc store";
 
@@ -1072,13 +903,9 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
     return result_proto;
   }
 
-  DeleteByQueryStatsProto* delete_stats =
-      result_proto.mutable_delete_by_query_stats();
-  delete_stats->set_query_length(search_spec.query().length());
-  delete_stats->set_num_namespaces_filtered(
-      search_spec.namespace_filters_size());
-  delete_stats->set_num_schema_types_filtered(
-      search_spec.schema_type_filters_size());
+  DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
+  delete_stats->set_delete_type(DeleteStatsProto::DeleteType::QUERY);
+
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
   libtextclassifier3::Status status =
@@ -1088,7 +915,6 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
     return result_proto;
   }
 
-  std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
   // Gets unordered results from query processor
   auto query_processor_or = QueryProcessor::Create(
       index_.get(), language_segmenter_.get(), normalizer_.get(),
@@ -1107,32 +933,14 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
   }
   QueryProcessor::QueryResults query_results =
       std::move(query_results_or).ValueOrDie();
-  delete_stats->set_parse_query_latency_ms(
-      component_timer->GetElapsedMilliseconds());
 
   ICING_VLOG(2) << "Deleting the docs that matched the query.";
   int num_deleted = 0;
-  // A map used to group deleted documents.
-  // From the (namespace, type) pair to a list of uris.
-  std::unordered_map<NamespaceTypePair,
-                     DeleteByQueryResultProto::DocumentGroupInfo*,
-                     NamespaceTypePairHasher>
-      deleted_info_map;
 
-  component_timer = clock_->GetNewTimer();
   while (query_results.root_iterator->Advance().ok()) {
     ICING_VLOG(3) << "Deleting doc "
                   << query_results.root_iterator->doc_hit_info().document_id();
     ++num_deleted;
-    if (return_deleted_document_info) {
-      status = RetrieveAndAddDocumentInfo(
-          document_store_.get(), result_proto, deleted_info_map,
-          query_results.root_iterator->doc_hit_info().document_id());
-      if (!status.ok()) {
-        TransformStatus(status, result_status);
-        return result_proto;
-      }
-    }
     status = document_store_->Delete(
         query_results.root_iterator->doc_hit_info().document_id());
     if (!status.ok()) {
@@ -1140,13 +948,6 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
       return result_proto;
     }
   }
-  delete_stats->set_document_removal_latency_ms(
-      component_timer->GetElapsedMilliseconds());
-  int term_count = 0;
-  for (const auto& section_and_terms : query_results.query_terms) {
-    term_count += section_and_terms.second.size();
-  }
-  delete_stats->set_num_terms(term_count);
 
   if (num_deleted > 0) {
     result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1201,8 +1002,12 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
   std::unique_ptr<Timer> optimize_timer = clock_->GetNewTimer();
   OptimizeStatsProto* optimize_stats = result_proto.mutable_optimize_stats();
   int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-  optimize_stats->set_storage_size_before(
-      Filesystem::SanitizeFileSize(before_size));
+  if (before_size != Filesystem::kBadFileSize) {
+    optimize_stats->set_storage_size_before(before_size);
+  } else {
+    // Set -1 as a sentinel value when failures occur.
+    optimize_stats->set_storage_size_before(-1);
+  }
 
   // Flushes data to disk before doing optimization
   auto status = InternalPersistToDisk(PersistType::FULL);
@@ -1279,8 +1084,12 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
   optimize_status_file.Write(std::move(optimize_status));
 
   int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-  optimize_stats->set_storage_size_after(
-      Filesystem::SanitizeFileSize(after_size));
+  if (after_size != Filesystem::kBadFileSize) {
+    optimize_stats->set_storage_size_after(after_size);
+  } else {
+    // Set -1 as a sentinel value when failures occur.
+    optimize_stats->set_storage_size_after(-1);
+  }
   optimize_stats->set_latency_ms(optimize_timer->GetElapsedMilliseconds());
 
   TransformStatus(optimization_status, result_status);
@@ -1362,8 +1171,11 @@ StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
   }
 
   int64_t index_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-  result.mutable_storage_info()->set_total_storage_size(
-      Filesystem::SanitizeFileSize(index_size));
+  if (index_size != Filesystem::kBadFileSize) {
+    result.mutable_storage_info()->set_total_storage_size(index_size);
+  } else {
+    result.mutable_storage_info()->set_total_storage_size(-1);
+  }
   *result.mutable_storage_info()->mutable_document_storage_info() =
       document_store_->GetStorageInfo();
   *result.mutable_storage_info()->mutable_schema_store_storage_info() =
@@ -1453,8 +1265,8 @@ SearchResultProto IcingSearchEngine::Search(
   component_timer = clock_->GetNewTimer();
   // Scores but does not rank the results.
   libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
-      scoring_processor_or = ScoringProcessor::Create(
-          scoring_spec, document_store_.get(), schema_store_.get());
+      scoring_processor_or =
+          ScoringProcessor::Create(scoring_spec, document_store_.get());
   if (!scoring_processor_or.ok()) {
     TransformStatus(scoring_processor_or.status(), result_status);
     return result_proto;
@@ -1765,8 +1577,9 @@ IcingSearchEngine::RestoreIndexIfNeeded() {
     return {libtextclassifier3::Status::OK, false};
   }
 
-  auto index_processor_or =
-      IndexProcessor::Create(normalizer_.get(), index_.get(), clock_.get());
+  auto index_processor_or = IndexProcessor::Create(
+      normalizer_.get(), index_.get(), CreateIndexProcessorOptions(options_),
+      clock_.get());
   if (!index_processor_or.ok()) {
     return {index_processor_or.status(), true};
   }
@@ -1844,18 +1657,22 @@ libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
 }
 
 ResetResultProto IcingSearchEngine::Reset() {
-  absl_ports::unique_lock l(&mutex_);
-  return ResetInternal();
-}
-
-ResetResultProto IcingSearchEngine::ResetInternal() {
   ICING_VLOG(1) << "Resetting IcingSearchEngine";
 
   ResetResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
 
+  absl_ports::unique_lock l(&mutex_);
+
   initialized_ = false;
-  ResetMembers();
+
+  // Resets members variables
+  schema_store_.reset();
+  document_store_.reset();
+  language_segmenter_.reset();
+  normalizer_.reset();
+  index_.reset();
+
   if (!filesystem_->DeleteDirectoryRecursively(options_.base_dir().c_str())) {
     result_status->set_code(StatusProto::INTERNAL);
     return result_proto;
@@ -1881,65 +1698,5 @@ ResetResultProto IcingSearchEngine::ResetInternal() {
   return result_proto;
 }
 
-SuggestionResponse IcingSearchEngine::SearchSuggestions(
-    const SuggestionSpecProto& suggestion_spec) {
-  // TODO(b/146008613) Explore ideas to make this function read-only.
-  absl_ports::unique_lock l(&mutex_);
-  SuggestionResponse response;
-  StatusProto* response_status = response.mutable_status();
-  if (!initialized_) {
-    response_status->set_code(StatusProto::FAILED_PRECONDITION);
-    response_status->set_message("IcingSearchEngine has not been initialized!");
-    return response;
-  }
-
-  libtextclassifier3::Status status =
-      ValidateSuggestionSpec(suggestion_spec, performance_configuration_);
-  if (!status.ok()) {
-    TransformStatus(status, response_status);
-    return response;
-  }
-
-  // Create the suggestion processor.
-  auto suggestion_processor_or = SuggestionProcessor::Create(
-      index_.get(), language_segmenter_.get(), normalizer_.get());
-  if (!suggestion_processor_or.ok()) {
-    TransformStatus(suggestion_processor_or.status(), response_status);
-    return response;
-  }
-  std::unique_ptr<SuggestionProcessor> suggestion_processor =
-      std::move(suggestion_processor_or).ValueOrDie();
-
-  std::unordered_set<NamespaceId> namespace_ids;
-  namespace_ids.reserve(suggestion_spec.namespace_filters_size());
-  for (std::string_view name_space : suggestion_spec.namespace_filters()) {
-    auto namespace_id_or = document_store_->GetNamespaceId(name_space);
-    if (!namespace_id_or.ok()) {
-      continue;
-    }
-    namespace_ids.insert(namespace_id_or.ValueOrDie());
-  }
-
-  // Run suggestion based on given SuggestionSpec.
-  NamespaceCheckerImpl namespace_checker_impl(document_store_.get(),
-                                              std::move(namespace_ids));
-  libtextclassifier3::StatusOr<std::vector<TermMetadata>> terms_or =
-      suggestion_processor->QuerySuggestions(suggestion_spec,
-                                             &namespace_checker_impl);
-  if (!terms_or.ok()) {
-    TransformStatus(terms_or.status(), response_status);
-    return response;
-  }
-
-  // Convert vector<TermMetaData> into final SuggestionResponse proto.
-  for (TermMetadata& term : terms_or.ValueOrDie()) {
-    SuggestionResponse::Suggestion suggestion;
-    suggestion.set_query(std::move(term.content));
-    response.mutable_suggestions()->Add(std::move(suggestion));
-  }
-  response_status->set_code(StatusProto::OK);
-  return response;
-}
-
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index ff9c7fb..855401f 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -280,9 +280,8 @@ class IcingSearchEngine {
   //   NOT_FOUND if the query doesn't match any documents
   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
   //   INTERNAL_ERROR on IO error
-  DeleteByQueryResultProto DeleteByQuery(
-      const SearchSpecProto& search_spec,
-      bool return_deleted_document_info = false) ICING_LOCKS_EXCLUDED(mutex_);
+  DeleteByQueryResultProto DeleteByQuery(const SearchSpecProto& search_spec)
+      ICING_LOCKS_EXCLUDED(mutex_);
 
   // Retrieves, scores, ranks, and returns the results according to the specs.
   // Results can be empty. If there're multiple pages of results,
@@ -303,17 +302,6 @@ class IcingSearchEngine {
                            const ResultSpecProto& result_spec)
       ICING_LOCKS_EXCLUDED(mutex_);
 
-  // Retrieves, scores, ranks and returns the suggested query string according
-  // to the specs. Results can be empty.
-  //
-  // Returns a SuggestionResponse with status:
-  //   OK with results on success
-  //   INVALID_ARGUMENT if any of specs is invalid
-  //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
-  //   INTERNAL_ERROR on any other errors
-  SuggestionResponse SearchSuggestions(
-      const SuggestionSpecProto& suggestion_spec) ICING_LOCKS_EXCLUDED(mutex_);
-
   // Fetches the next page of results of a previously executed query. Results
   // can be empty if next-page token is invalid. Invalid next page tokens are
   // tokens that are either zero or were previously passed to
@@ -464,25 +452,6 @@ class IcingSearchEngine {
   // Pointer to JNI class references
   const std::unique_ptr<const JniCache> jni_cache_;
 
-  // Resets all members that are created during Initialize.
-  void ResetMembers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Resets all members that are created during Initialize, deletes all
-  // underlying files and initializes a fresh index.
-  ResetResultProto ResetInternal() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Checks for the existence of the init marker file. If the failed init count
-  // exceeds kMaxUnsuccessfulInitAttempts, all data is deleted and the index is
-  // initialized from scratch. The updated count (original failed init count + 1
-  // ) is written to the marker file.
-  //
-  // RETURNS
-  //   OK on success
-  //   INTERNAL if an IO error occurs while trying to update the marker file.
-  libtextclassifier3::Status CheckInitMarkerFile(
-      InitializeStatsProto* initialize_stats)
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
   // Helper method to do the actual work to persist data to disk. We need this
   // separate method so that other public methods don't need to call
   // PersistToDisk(). Public methods calling each other may cause deadlock
@@ -508,6 +477,15 @@ class IcingSearchEngine {
       InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
+  // Do any validation/setup required for the given IcingSearchEngineOptions
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT if options has invalid values
+  //   INTERNAL on I/O error
+  libtextclassifier3::Status InitializeOptions()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
   // Do any initialization/recovery necessary to create a SchemaStore instance.
   //
   // Returns:
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index 5e610d5..ba9aed1 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -43,6 +43,7 @@
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/document-generator.h"
 #include "icing/testing/random-string.h"
+#include "icing/testing/recorder-test-utils.h"
 #include "icing/testing/schema-generator.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -177,12 +178,12 @@ class DestructibleDirectory {
 };
 
 std::vector<DocumentProto> GenerateRandomDocuments(
-    EvenDistributionTypeSelector* type_selector, int num_docs,
-    const std::vector<std::string>& language) {
+    EvenDistributionTypeSelector* type_selector, int num_docs) {
   std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
   EvenDistributionNamespaceSelector namespace_selector(namespaces);
 
   std::default_random_engine random;
+  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
   UniformDistributionLanguageTokenGenerator<std::default_random_engine>
       token_generator(language, &random);
 
@@ -226,9 +227,8 @@ void BM_IndexLatency(benchmark::State& state) {
   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
 
   int num_docs = state.range(0);
-  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
   const std::vector<DocumentProto> random_docs =
-      GenerateRandomDocuments(&type_selector, num_docs, language);
+      GenerateRandomDocuments(&type_selector, num_docs);
   Timer timer;
   for (const DocumentProto& doc : random_docs) {
     ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
@@ -271,56 +271,6 @@ BENCHMARK(BM_IndexLatency)
     ->ArgPair(1 << 15, 10)
     ->ArgPair(1 << 17, 10);
 
-void BM_QueryLatency(benchmark::State& state) {
-  // Initialize the filesystem
-  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
-  Filesystem filesystem;
-  DestructibleDirectory ddir(filesystem, test_dir);
-
-  // Create the schema.
-  std::default_random_engine random;
-  int num_types = kAvgNumNamespaces * kAvgNumTypes;
-  ExactStringPropertyGenerator property_generator;
-  SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
-      /*num_properties=*/state.range(1), &property_generator);
-  SchemaProto schema = schema_generator.GenerateSchema(num_types);
-  EvenDistributionTypeSelector type_selector(schema);
-
-  // Create the index.
-  IcingSearchEngineOptions options;
-  options.set_base_dir(test_dir);
-  options.set_index_merge_size(kIcingFullIndexSize);
-  std::unique_ptr<IcingSearchEngine> icing =
-      std::make_unique<IcingSearchEngine>(options);
-
-  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
-
-  int num_docs = state.range(0);
-  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
-  const std::vector<DocumentProto> random_docs =
-      GenerateRandomDocuments(&type_selector, num_docs, language);
-  for (const DocumentProto& doc : random_docs) {
-    ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
-  }
-
-  SearchSpecProto search_spec = CreateSearchSpec(
-      language.at(0), std::vector<std::string>(), TermMatchType::PREFIX);
-  ResultSpecProto result_spec = CreateResultSpec(1000000, 1000000, 1000000);
-  ScoringSpecProto scoring_spec =
-      CreateScoringSpec(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
-  for (auto _ : state) {
-    SearchResultProto results = icing->Search(
-        search_spec, ScoringSpecProto::default_instance(), result_spec);
-  }
-}
-BENCHMARK(BM_QueryLatency)
-    // Arguments: num_indexed_documents, num_sections
-    ->ArgPair(32, 2)
-    ->ArgPair(128, 2)
-    ->ArgPair(1 << 10, 2)
-    ->ArgPair(1 << 13, 2);
-
 void BM_IndexThroughput(benchmark::State& state) {
   // Initialize the filesystem
   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
@@ -347,9 +297,8 @@ void BM_IndexThroughput(benchmark::State& state) {
   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
 
   int num_docs = state.range(0);
-  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
   const std::vector<DocumentProto> random_docs =
-      GenerateRandomDocuments(&type_selector, num_docs, language);
+      GenerateRandomDocuments(&type_selector, num_docs);
   for (auto s : state) {
     for (const DocumentProto& doc : random_docs) {
       ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
diff --git a/icing/icing-search-engine_fuzz_test.cc b/icing/icing-search-engine_fuzz_test.cc
index bf486da..2d07e37 100644
--- a/icing/icing-search-engine_fuzz_test.cc
+++ b/icing/icing-search-engine_fuzz_test.cc
@@ -18,12 +18,12 @@
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/document-builder.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/icing-search-engine.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/schema-builder.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -31,13 +31,13 @@ namespace icing {
 namespace lib {
 namespace {
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
-    PropertyConfigProto::Cardinality::REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
 
 IcingSearchEngineOptions Setup() {
   IcingSearchEngineOptions icing_options;
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 13e77b8..4c15827 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -27,8 +27,8 @@
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/mock-filesystem.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
-#include "icing/portable/endian.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
@@ -45,7 +45,6 @@
 #include "icing/store/document-log-creator.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/random-string.h"
 #include "icing/testing/snippet-helpers.h"
@@ -90,24 +89,21 @@ constexpr std::string_view kIpsumText =
     "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
     "placerat semper.";
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
-    PropertyConfigProto::Cardinality::REQUIRED;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
-    PropertyConfigProto::Cardinality::REPEATED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_NONE =
-    StringIndexingConfig::TokenizerType::NONE;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
+    StringIndexingConfig_TokenizerType_Code_NONE;
 
-#ifndef ICING_JNI_TEST
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
-#endif  // !ICING_JNI_TEST
-
-constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
-constexpr TermMatchType::Code MATCH_NONE = TermMatchType::UNKNOWN;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN;
 
 PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
     Filesystem filesystem, const std::string& file_path) {
@@ -362,6 +358,36 @@ TEST_F(IcingSearchEngineTest, GoodIndexMergeSizeReturnsOk) {
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
 }
 
+TEST_F(IcingSearchEngineTest,
+       NegativeMaxTokensPerDocSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_max_tokens_per_doc(-1);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, ZeroMaxTokensPerDocSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_max_tokens_per_doc(0);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, GoodMaxTokensPerDocSizeReturnsOk) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // INT_MAX is valid - it just means that we shouldn't limit the number of
+  // tokens per document. It would be pretty inconceivable that anyone would
+  // produce such a document - the text being indexed alone would take up at
+  // least ~4.3 GiB! - and the document would be rejected before indexing
+  // for exceeding max_document_size, but there's no reason to explicitly
+  // bar it.
+  options.set_max_tokens_per_doc(std::numeric_limits<int32_t>::max());
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+}
+
 TEST_F(IcingSearchEngineTest, NegativeMaxTokenLenReturnsInvalidArgument) {
   IcingSearchEngineOptions options = GetDefaultIcingOptions();
   options.set_max_token_length(-1);
@@ -478,217 +504,6 @@ TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
               HasSubstr("Could not create directory"));
 }
 
-TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresAtThreshold) {
-  Filesystem filesystem;
-  DocumentProto email1 =
-      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
-  email1.set_creation_timestamp_ms(10000);
-  DocumentProto email2 =
-      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
-  email2.set_creation_timestamp_ms(10000);
-
-  {
-    // Create an index with a few documents.
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-    InitializeResultProto init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoIsOk());
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(0));
-    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
-    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
-  }
-
-  // Write an init marker file with 5 previously failed attempts.
-  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
-
-  {
-    ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
-    int network_init_attempts = GHostToNetworkL(5);
-    // Write the updated number of attempts before we get started.
-    ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
-                                  &network_init_attempts,
-                                  sizeof(network_init_attempts)));
-    ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
-  }
-
-  {
-    // Create the index again and verify that initialization succeeds and no
-    // data is thrown out.
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-    InitializeResultProto init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoIsOk());
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(5));
-    EXPECT_THAT(
-        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
-            .document(),
-        EqualsProto(email1));
-    EXPECT_THAT(
-        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
-            .document(),
-        EqualsProto(email2));
-  }
-
-  // The successful init should have thrown out the marker file.
-  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresBeyondThreshold) {
-  Filesystem filesystem;
-  DocumentProto email1 =
-      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
-  DocumentProto email2 =
-      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
-
-  {
-    // Create an index with a few documents.
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-    InitializeResultProto init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoIsOk());
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(0));
-    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
-    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
-  }
-
-  // Write an init marker file with 6 previously failed attempts.
-  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
-
-  {
-    ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
-    int network_init_attempts = GHostToNetworkL(6);
-    // Write the updated number of attempts before we get started.
-    ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
-                                  &network_init_attempts,
-                                  sizeof(network_init_attempts)));
-    ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
-  }
-
-  {
-    // Create the index again and verify that initialization succeeds and all
-    // data is thrown out.
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-    InitializeResultProto init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(),
-                ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(6));
-    EXPECT_THAT(
-        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
-            .status(),
-        ProtoStatusIs(StatusProto::NOT_FOUND));
-    EXPECT_THAT(
-        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
-            .status(),
-        ProtoStatusIs(StatusProto::NOT_FOUND));
-  }
-
-  // The successful init should have thrown out the marker file.
-  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, SuccessiveInitFailuresIncrementsInitMarker) {
-  Filesystem filesystem;
-  DocumentProto email1 =
-      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
-  DocumentProto email2 =
-      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
-
-  {
-    // 1. Create an index with a few documents.
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-    InitializeResultProto init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoIsOk());
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(0));
-    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
-    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
-  }
-
-  {
-    // 2. Create an index that will encounter an IO failure when trying to
-    // create the document log.
-    IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
-    auto mock_filesystem = std::make_unique<MockFilesystem>();
-    std::string document_log_filepath =
-        icing_options.base_dir() + "/document_dir/document_log_v1";
-    auto get_filesize_lambda = [this,
-                                &document_log_filepath](const char* filename) {
-      if (strncmp(document_log_filepath.c_str(), filename,
-                  document_log_filepath.length()) == 0) {
-        return Filesystem::kBadFileSize;
-      }
-      return this->filesystem()->GetFileSize(filename);
-    };
-    ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
-        .WillByDefault(get_filesize_lambda);
-
-    TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
-                                std::make_unique<IcingFilesystem>(),
-                                std::make_unique<FakeClock>(),
-                                GetTestJniCache());
-
-    // Fail to initialize six times in a row.
-    InitializeResultProto init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(0));
-
-    init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(1));
-
-    init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(2));
-
-    init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(3));
-
-    init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(4));
-
-    init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(5));
-  }
-
-  {
-    // 3. Create the index again and verify that initialization succeeds and all
-    // data is thrown out.
-    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-    InitializeResultProto init_result = icing.Initialize();
-    ASSERT_THAT(init_result.status(),
-                ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
-    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
-                Eq(6));
-
-    EXPECT_THAT(
-        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
-            .status(),
-        ProtoStatusIs(StatusProto::NOT_FOUND));
-    EXPECT_THAT(
-        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
-            .status(),
-        ProtoStatusIs(StatusProto::NOT_FOUND));
-  }
-
-  // The successful init should have thrown out the marker file.
-  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
-  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
 TEST_F(IcingSearchEngineTest,
        CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
   // Create a type config with a circular reference.
@@ -765,7 +580,8 @@ TEST_F(IcingSearchEngineTest, FailToWriteSchema) {
 
   auto mock_filesystem = std::make_unique<MockFilesystem>();
   // This fails FileBackedProto::Write()
-  ON_CALL(*mock_filesystem, OpenForWrite(HasSubstr("schema.pb")))
+  ON_CALL(*mock_filesystem,
+          OpenForWrite(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
       .WillByDefault(Return(-1));
 
   TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
@@ -922,13 +738,7 @@ TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
 
-    SetSchemaResultProto set_schema_result = icing.SetSchema(schema);
-    // Ignore latency numbers. They're covered elsewhere.
-    set_schema_result.clear_latency_ms();
-    SetSchemaResultProto expected_set_schema_result;
-    expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
-    expected_set_schema_result.mutable_new_schema_types()->Add("Email");
-    EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
 
     EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
   }
@@ -946,20 +756,12 @@ TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
     property->set_property_name("title");
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property = type->add_properties();
     property->set_property_name("body");
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
 
     // 3. SetSchema should succeed and the version number should be updated.
-    SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true);
-    // Ignore latency numbers. They're covered elsewhere.
-    set_schema_result.clear_latency_ms();
-    SetSchemaResultProto expected_set_schema_result;
-    expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
-    expected_set_schema_result.mutable_fully_compatible_changed_schema_types()
-        ->Add("Email");
-    EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+    EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
 
     EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
   }
@@ -1145,12 +947,7 @@ TEST_F(IcingSearchEngineTest,
 }
 
 TEST_F(IcingSearchEngineTest, SetSchema) {
-  auto fake_clock = std::make_unique<FakeClock>();
-  fake_clock->SetTimerElapsedMilliseconds(1000);
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::make_unique<Filesystem>(),
-                              std::make_unique<IcingFilesystem>(),
-                              std::move(fake_clock), GetTestJniCache());
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
   auto message_document = CreateMessageDocument("namespace", "uri");
@@ -1179,31 +976,26 @@ TEST_F(IcingSearchEngineTest, SetSchema) {
   empty_type->set_schema_type("");
 
   // Make sure we can't set invalid schemas
-  SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema);
-  EXPECT_THAT(set_schema_result.status(),
+  EXPECT_THAT(icing.SetSchema(invalid_schema).status(),
               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
 
   // Can add an document of a set schema
-  set_schema_result = icing.SetSchema(schema_with_message);
-  EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
-  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+  EXPECT_THAT(icing.SetSchema(schema_with_message).status(), ProtoIsOk());
   EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
 
   // Schema with Email doesn't have Message, so would result incompatible
   // data
-  set_schema_result = icing.SetSchema(schema_with_email);
-  EXPECT_THAT(set_schema_result.status(),
+  EXPECT_THAT(icing.SetSchema(schema_with_email).status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
 
   // Can expand the set of schema types and add an document of a new
   // schema type
-  set_schema_result = icing.SetSchema(schema_with_email_and_message);
-  EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
-  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
-
+  EXPECT_THAT(icing.SetSchema(SchemaProto(schema_with_email_and_message))
+                  .status()
+                  .code(),
+              Eq(StatusProto::OK));
   EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
   // Can't add an document whose schema isn't set
   auto photo_document = DocumentBuilder()
                             .SetKey("namespace", "uri")
@@ -1217,7 +1009,7 @@ TEST_F(IcingSearchEngineTest, SetSchema) {
 }
 
 TEST_F(IcingSearchEngineTest,
-       SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) {
+       SetSchemaTriggersIndexRestorationAndReturnsOk) {
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
@@ -1226,15 +1018,8 @@ TEST_F(IcingSearchEngineTest,
       ->mutable_properties(0)
       ->clear_string_indexing_config();
 
-  SetSchemaResultProto set_schema_result =
-      icing.SetSchema(schema_with_no_indexed_property);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
-  SetSchemaResultProto expected_set_schema_result;
-  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
-  expected_set_schema_result.mutable_new_schema_types()->Add("Message");
-  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
+  EXPECT_THAT(icing.SetSchema(schema_with_no_indexed_property).status(),
+              ProtoIsOk());
   // Nothing will be index and Search() won't return anything.
   EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
               ProtoIsOk());
@@ -1255,14 +1040,8 @@ TEST_F(IcingSearchEngineTest,
   SchemaProto schema_with_indexed_property = CreateMessageSchema();
   // Index restoration should be triggered here because new schema requires more
   // properties to be indexed.
-  set_schema_result = icing.SetSchema(schema_with_indexed_property);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
-  expected_set_schema_result = SetSchemaResultProto();
-  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
-  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
-      ->Add("Message");
-  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+  EXPECT_THAT(icing.SetSchema(schema_with_indexed_property).status(),
+              ProtoIsOk());
 
   SearchResultProto expected_search_result_proto;
   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1306,12 +1085,8 @@ TEST_F(IcingSearchEngineTest,
           .Build();
 
   SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   SetSchemaResultProto expected_set_schema_result;
   expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
-  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
-  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
   EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
   DocumentProto document =
@@ -1378,12 +1153,8 @@ TEST_F(IcingSearchEngineTest,
           .Build();
 
   set_schema_result = icing.SetSchema(no_nested_schema);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   expected_set_schema_result = SetSchemaResultProto();
   expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
-  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
-      ->Add("Email");
   EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
   // document shouldn't match a query for 'Bill' in either 'sender.name' or
@@ -1426,10 +1197,7 @@ TEST_F(IcingSearchEngineTest,
 
   SetSchemaResultProto set_schema_result =
       icing.SetSchema(email_with_body_schema);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   SetSchemaResultProto expected_set_schema_result;
-  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
   expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
   EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
@@ -1475,12 +1243,8 @@ TEST_F(IcingSearchEngineTest,
 
   set_schema_result = icing.SetSchema(
       email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   expected_set_schema_result = SetSchemaResultProto();
   expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
-  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
-      ->Add("Email");
   expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
   EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
@@ -1518,10 +1282,7 @@ TEST_F(
 
   SetSchemaResultProto set_schema_result =
       icing.SetSchema(email_with_body_schema);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   SetSchemaResultProto expected_set_schema_result;
-  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
   expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
   EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
@@ -1575,12 +1336,8 @@ TEST_F(
 
   set_schema_result = icing.SetSchema(
       email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   expected_set_schema_result = SetSchemaResultProto();
   expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
-  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
-      ->Add("Email");
   expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
   EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
@@ -1628,11 +1385,7 @@ TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
           .Build();
 
   SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   SetSchemaResultProto expected_set_schema_result;
-  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
-  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
   expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
   EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
@@ -1685,15 +1438,9 @@ TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
 
   set_schema_result = icing.SetSchema(
       nested_schema, /*ignore_errors_and_delete_documents=*/true);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   expected_set_schema_result = SetSchemaResultProto();
   expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person");
   expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
-  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
-      ->Add("Email");
-  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
-      ->Add("Person");
   expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
   EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
 
@@ -1752,10 +1499,6 @@ TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
   property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
 
   // Can't set the schema since it's incompatible
-  SetSchemaResultProto set_schema_result =
-      icing.SetSchema(schema_with_required_subject);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   SetSchemaResultProto expected_set_schema_result_proto;
   expected_set_schema_result_proto.mutable_status()->set_code(
       StatusProto::FAILED_PRECONDITION);
@@ -1763,17 +1506,15 @@ TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
       "Schema is incompatible.");
   expected_set_schema_result_proto.add_incompatible_schema_types("email");
 
-  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
+  EXPECT_THAT(icing.SetSchema(schema_with_required_subject),
+              EqualsProto(expected_set_schema_result_proto));
 
   // Force set it
-  set_schema_result =
-      icing.SetSchema(schema_with_required_subject,
-                      /*ignore_errors_and_delete_documents=*/true);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_set_schema_result_proto.mutable_status()->clear_message();
-  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
+  EXPECT_THAT(icing.SetSchema(schema_with_required_subject,
+                              /*ignore_errors_and_delete_documents=*/true),
+              EqualsProto(expected_set_schema_result_proto));
 
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1830,25 +1571,19 @@ TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) {
   type->set_schema_type("email");
 
   // Can't set the schema since it's incompatible
-  SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   SetSchemaResultProto expected_result;
   expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
   expected_result.mutable_status()->set_message("Schema is incompatible.");
   expected_result.add_deleted_schema_types("message");
 
-  EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
+  EXPECT_THAT(icing.SetSchema(new_schema), EqualsProto(expected_result));
 
   // Force set it
-  set_schema_result =
-      icing.SetSchema(new_schema,
-                      /*ignore_errors_and_delete_documents=*/true);
-  // Ignore latency numbers. They're covered elsewhere.
-  set_schema_result.clear_latency_ms();
   expected_result.mutable_status()->set_code(StatusProto::OK);
   expected_result.mutable_status()->clear_message();
-  EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
+  EXPECT_THAT(icing.SetSchema(new_schema,
+                              /*ignore_errors_and_delete_documents=*/true),
+              EqualsProto(expected_result));
 
   // "email" document is still there
   GetResultProto expected_get_result_proto;
@@ -2167,7 +1902,7 @@ TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
   search_spec.set_query("message");
 
   ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
 
@@ -2585,7 +2320,7 @@ TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
 
   ResultSpecProto result_spec;
   result_spec.set_num_per_page(2);
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
 
@@ -2992,17 +2727,13 @@ TEST_F(IcingSearchEngineTest, OptimizationFailureUninitializesIcing) {
   };
   ON_CALL(*mock_filesystem, CreateDirectoryRecursively)
       .WillByDefault(create_dir_lambda);
-
   auto swap_lambda = [&just_swapped_files](const char* first_dir,
                                            const char* second_dir) {
     just_swapped_files = true;
     return false;
   };
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
-                                      HasSubstr("document_dir")))
-      .WillByDefault(swap_lambda);
-  TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+  ON_CALL(*mock_filesystem, SwapFiles).WillByDefault(swap_lambda);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
                               std::move(mock_filesystem),
                               std::make_unique<IcingFilesystem>(),
                               std::make_unique<FakeClock>(), GetTestJniCache());
@@ -3455,16 +3186,11 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
   DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
   EXPECT_THAT(result_proto.status(), ProtoIsOk());
-  DeleteByQueryStatsProto exp_stats;
+  DeleteStatsProto exp_stats;
+  exp_stats.set_delete_type(DeleteStatsProto::DeleteType::QUERY);
   exp_stats.set_latency_ms(7);
   exp_stats.set_num_documents_deleted(1);
-  exp_stats.set_query_length(search_spec.query().length());
-  exp_stats.set_num_terms(1);
-  exp_stats.set_num_namespaces_filtered(0);
-  exp_stats.set_num_schema_types_filtered(0);
-  exp_stats.set_parse_query_latency_ms(7);
-  exp_stats.set_document_removal_latency_ms(7);
-  EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
+  EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
   expected_get_result_proto.mutable_status()->set_message(
@@ -3496,105 +3222,6 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
                                        expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, DeleteByQueryReturnInfo) {
-  DocumentProto document1 =
-      DocumentBuilder()
-          .SetKey("namespace1", "uri1")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message body1")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto document2 =
-      DocumentBuilder()
-          .SetKey("namespace2", "uri2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message body2")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto document3 =
-      DocumentBuilder()
-          .SetKey("namespace2", "uri3")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message body3")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-
-  auto fake_clock = std::make_unique<FakeClock>();
-  fake_clock->SetTimerElapsedMilliseconds(7);
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::make_unique<Filesystem>(),
-                              std::make_unique<IcingFilesystem>(),
-                              std::move(fake_clock), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(
-      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
-      EqualsProto(expected_get_result_proto));
-
-  *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(
-      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
-      EqualsProto(expected_get_result_proto));
-
-  *expected_get_result_proto.mutable_document() = document3;
-  EXPECT_THAT(
-      icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()),
-      EqualsProto(expected_get_result_proto));
-
-  // Delete all docs to test the information is correctly grouped.
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-  DeleteByQueryResultProto result_proto =
-      icing.DeleteByQuery(search_spec, true);
-  EXPECT_THAT(result_proto.status(), ProtoIsOk());
-  DeleteByQueryStatsProto exp_stats;
-  exp_stats.set_latency_ms(7);
-  exp_stats.set_num_documents_deleted(3);
-  exp_stats.set_query_length(search_spec.query().length());
-  exp_stats.set_num_terms(1);
-  exp_stats.set_num_namespaces_filtered(0);
-  exp_stats.set_num_schema_types_filtered(0);
-  exp_stats.set_parse_query_latency_ms(7);
-  exp_stats.set_document_removal_latency_ms(7);
-  EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
-
-  // Check that DeleteByQuery can return information for deleted documents.
-  DeleteByQueryResultProto::DocumentGroupInfo info1, info2;
-  info1.set_namespace_("namespace1");
-  info1.set_schema("Message");
-  info1.add_uris("uri1");
-  info2.set_namespace_("namespace2");
-  info2.set_schema("Message");
-  info2.add_uris("uri3");
-  info2.add_uris("uri2");
-  EXPECT_THAT(result_proto.deleted_documents(),
-              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2)));
-
-  EXPECT_THAT(
-      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance())
-          .status()
-          .code(),
-      Eq(StatusProto::NOT_FOUND));
-  EXPECT_THAT(
-      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance())
-          .status()
-          .code(),
-      Eq(StatusProto::NOT_FOUND));
-  EXPECT_THAT(
-      icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance())
-          .status()
-          .code(),
-      Eq(StatusProto::NOT_FOUND));
-}
-
 TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
   DocumentProto document1 =
       DocumentBuilder()
@@ -3755,8 +3382,7 @@ TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
   // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes
   // it return ABORTED_ERROR.
   auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ON_CALL(*mock_filesystem,
-          DeleteDirectoryRecursively(HasSubstr("_optimize_tmp")))
+  ON_CALL(*mock_filesystem, DeleteDirectoryRecursively)
       .WillByDefault(Return(false));
 
   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
@@ -3803,8 +3429,7 @@ TEST_F(IcingSearchEngineTest,
   // Creates a mock filesystem in which SwapFiles() always fails and deletes the
   // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
   auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
-                                      HasSubstr("document_dir")))
+  ON_CALL(*mock_filesystem, SwapFiles)
       .WillByDefault([this](const char* one, const char* two) {
         filesystem()->DeleteDirectoryRecursively(one);
         filesystem()->DeleteDirectoryRecursively(two);
@@ -3875,8 +3500,7 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
   // Creates a mock filesystem in which SwapFiles() always fails and empties the
   // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
   auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
-                                      HasSubstr("document_dir")))
+  ON_CALL(*mock_filesystem, SwapFiles)
       .WillByDefault([this](const char* one, const char* two) {
         filesystem()->DeleteDirectoryRecursively(one);
         filesystem()->CreateDirectoryRecursively(one);
@@ -5807,230 +5431,74 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
               EqualsSearchResultIgnoreStatsAndScores(empty_result));
 }
 
-TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) {
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
+TEST_F(IcingSearchEngineTest, PersistToDisk) {
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() =
+      CreateMessageDocument("namespace", "uri");
+
   {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
     EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-  }  // Destructing calls a PersistToDisk(FULL)
-
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-
-  // There should be no recovery since everything should be saved properly.
-  InitializeResultProto init_result = icing.Initialize();
-  EXPECT_THAT(init_result.status(), ProtoIsOk());
-  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
-              Eq(InitializeStatsProto::NO_DATA_LOSS));
-  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
-              Eq(InitializeStatsProto::NONE));
-  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
-              Eq(InitializeStatsProto::NONE));
-  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
-              Eq(InitializeStatsProto::NONE));
-
-  // Schema is still intact.
-  GetSchemaResultProto expected_get_schema_result_proto;
-  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+    EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+                ProtoIsOk());
 
-  EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+    // Persisting shouldn't affect anything
+    EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
 
-  // Documents are still intact.
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document;
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+  }  // Destructing persists as well
 
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
   EXPECT_THAT(
       icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
       EqualsProto(expected_get_result_proto));
-
-  // Index is still intact.
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");  // Content in the Message document.
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document;
-
-  SearchResultProto actual_results =
-      icing.Search(search_spec, GetDefaultScoringSpec(),
-                   ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
-                                  expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, ExplicitPersistToDiskFullSavesEverything) {
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
-
-  // Add schema and documents to our first icing1 instance.
+TEST_F(IcingSearchEngineTest, NoPersistToDiskLiteDoesntPersistPut) {
   IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
   EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
   EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
-  EXPECT_THAT(icing1.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
-
-  // Initialize a second icing2 instance which should have it's own memory
-  // space. If data from icing1 isn't being persisted to the files, then icing2
-  // won't be able to see those changes.
-  IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
-
-  // There should be no recovery since everything should be saved properly.
-  InitializeResultProto init_result = icing2.Initialize();
-  EXPECT_THAT(init_result.status(), ProtoIsOk());
-  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
-              Eq(InitializeStatsProto::NO_DATA_LOSS));
-  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
-              Eq(InitializeStatsProto::NONE));
-  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
-              Eq(InitializeStatsProto::NONE));
-  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
-              Eq(InitializeStatsProto::NONE));
-
-  // Schema is still intact.
-  GetSchemaResultProto expected_get_schema_result_proto;
-  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
-
-  EXPECT_THAT(icing2.GetSchema(),
-              EqualsProto(expected_get_schema_result_proto));
-
-  // Documents are still intact.
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document;
-
-  EXPECT_THAT(
-      icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()),
-      EqualsProto(expected_get_result_proto));
-
-  // Index is still intact.
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");  // Content in the Message document.
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document;
-
-  SearchResultProto actual_results =
-      icing2.Search(search_spec, GetDefaultScoringSpec(),
-                    ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
-                                  expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, NoPersistToDiskLosesAllDocumentsAndIndex) {
-  IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
-  EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
-  EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
-  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri");
+  EXPECT_THAT(icing1.Put(document1).status(), ProtoIsOk());
   EXPECT_THAT(
       icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
           .document(),
-      EqualsProto(document));
-
-  // It's intentional that no PersistToDisk call is made before initializing a
-  // second instance of icing.
+      EqualsProto(document1));
 
   IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
-  InitializeResultProto init_result = icing2.Initialize();
-  EXPECT_THAT(init_result.status(), ProtoIsOk());
-  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
-              Eq(InitializeStatsProto::PARTIAL_LOSS));
-  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
-              Eq(InitializeStatsProto::DATA_LOSS));
-  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
-              Eq(InitializeStatsProto::NONE));
-  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
-              Eq(InitializeStatsProto::NONE));
-
+  EXPECT_THAT(icing2.Initialize().status(), ProtoIsOk());
   // The document shouldn't be found because we forgot to call
   // PersistToDisk(LITE)!
   EXPECT_THAT(
       icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
           .status(),
       ProtoStatusIs(StatusProto::NOT_FOUND));
-
-  // Searching also shouldn't get us anything because the index wasn't
-  // recovered.
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");  // Content in the Message document.
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
-  SearchResultProto actual_results =
-      icing2.Search(search_spec, GetDefaultScoringSpec(),
-                    ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
-                                  expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, PersistToDiskLiteSavesGroundTruth) {
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
-
+TEST_F(IcingSearchEngineTest, PersistToDiskLitePersistsPut) {
   IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
   EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
   EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri");
+  EXPECT_THAT(icing1.Put(document1).status(), ProtoIsOk());
   EXPECT_THAT(icing1.PersistToDisk(PersistType::LITE).status(), ProtoIsOk());
   EXPECT_THAT(
       icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
           .document(),
-      EqualsProto(document));
+      EqualsProto(document1));
 
   IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
-  InitializeResultProto init_result = icing2.Initialize();
-  EXPECT_THAT(init_result.status(), ProtoIsOk());
-  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
-              Eq(InitializeStatsProto::NO_DATA_LOSS));
-  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
-              Eq(InitializeStatsProto::NONE));
-
-  // A checksum mismatch gets reported as an IO error. The document store and
-  // index didn't have their derived files included in the checksum previously,
-  // so reinitializing will trigger a checksum mismatch.
-  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
-              Eq(InitializeStatsProto::IO_ERROR));
-  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
-              Eq(InitializeStatsProto::IO_ERROR));
-
-  // Schema is still intact.
-  GetSchemaResultProto expected_get_schema_result_proto;
-  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
-
-  EXPECT_THAT(icing2.GetSchema(),
-              EqualsProto(expected_get_schema_result_proto));
-
+  EXPECT_THAT(icing2.Initialize().status(), ProtoIsOk());
   // The document should be found because we called PersistToDisk(LITE)!
   EXPECT_THAT(
       icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
           .document(),
-      EqualsProto(document));
-
-  // Recovered index is still intact.
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");  // Content in the Message document.
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document;
-
-  SearchResultProto actual_results =
-      icing2.Search(search_spec, GetDefaultScoringSpec(),
-                    ResultSpecProto::default_instance());
-  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
-                                  expected_search_result_proto));
+      EqualsProto(document1));
 }
 
 TEST_F(IcingSearchEngineTest, ResetOk) {
@@ -6123,7 +5591,7 @@ TEST_F(IcingSearchEngineTest, SnippetNormalization) {
   search_spec.set_query("mdi Zürich");
 
   ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
 
@@ -6186,7 +5654,7 @@ TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
   search_spec.set_query("md Zür");
 
   ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
 
@@ -6241,7 +5709,7 @@ TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
   search_spec.set_query("body:Zür");
 
   ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(10);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(10);
 
@@ -7514,6 +6982,10 @@ TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) {
   // No merge should happen.
   EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
               Eq(0));
+  // Number of tokens should not exceed.
+  EXPECT_FALSE(put_result_proto.put_document_stats()
+                   .tokenization_stats()
+                   .exceeded_max_token_num());
   // The input document has 2 tokens.
   EXPECT_THAT(put_result_proto.put_document_stats()
                   .tokenization_stats()
@@ -7521,6 +6993,33 @@ TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) {
               Eq(2));
 }
 
+TEST_F(IcingSearchEngineTest, PutDocumentShouldLogWhetherNumTokensExceeds) {
+  // Create a document with 2 tokens.
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .Build();
+
+  // Create an icing instance with max_tokens_per_doc = 1.
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+  icing_options.set_max_tokens_per_doc(1);
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  // Number of tokens(2) exceeds the max allowed value(1).
+  EXPECT_TRUE(put_result_proto.put_document_stats()
+                  .tokenization_stats()
+                  .exceeded_max_token_num());
+  EXPECT_THAT(put_result_proto.put_document_stats()
+                  .tokenization_stats()
+                  .num_tokens_indexed(),
+              Eq(1));
+}
+
 TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexMergeLatency) {
   DocumentProto document1 = DocumentBuilder()
                                 .SetKey("icing", "fake_type/1")
@@ -7769,7 +7268,7 @@ TEST_F(IcingSearchEngineTest, QueryStatsProtoTest) {
 
   ResultSpecProto result_spec;
   result_spec.set_num_per_page(2);
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
 
@@ -7980,7 +7479,7 @@ TEST_F(IcingSearchEngineTest, SnippetErrorTest) {
   ResultSpecProto result_spec;
   result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(4);
   SearchResultProto search_results =
       icing.Search(search_spec, scoring_spec, result_spec);
 
@@ -8088,599 +7587,6 @@ TEST_F(IcingSearchEngineTest, CJKSnippetTest) {
   EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
 }
 
-TEST_F(IcingSearchEngineTest, InvalidToEmptyQueryTest) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
-  // String:     "Luca Brasi sleeps with the 🐟🐟🐟."
-  //              ^    ^     ^      ^    ^   ^ ^  ^ ^
-  // UTF8 idx:    0    5     11     18   23 27 3135 39
-  // UTF16 idx:   0    5     11     18   23 27 2931 33
-  // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
-  // and "🐟".
-  constexpr std::string_view kSicilianMessage =
-      "Luca Brasi sleeps with the 🐟🐟🐟.";
-  DocumentProto document = DocumentBuilder()
-                               .SetKey("namespace", "uri1")
-                               .SetSchema("Message")
-                               .AddStringProperty("body", kSicilianMessage)
-                               .Build();
-  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-  DocumentProto document_two =
-      DocumentBuilder()
-          .SetKey("namespace", "uri2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "Some other content.")
-          .Build();
-  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
-  // Search and request snippet matching but no windowing.
-  SearchSpecProto search_spec;
-  search_spec.set_query("?");
-  search_spec.set_term_match_type(MATCH_PREFIX);
-  ScoringSpecProto scoring_spec;
-  ResultSpecProto result_spec;
-
-  // Search and make sure that we got a single successful result
-  SearchResultProto search_results =
-      icing.Search(search_spec, scoring_spec, result_spec);
-  EXPECT_THAT(search_results.status(), ProtoIsOk());
-  EXPECT_THAT(search_results.results(), SizeIs(2));
-
-  search_spec.set_query("。");
-  search_results = icing.Search(search_spec, scoring_spec, result_spec);
-  EXPECT_THAT(search_results.status(), ProtoIsOk());
-  EXPECT_THAT(search_results.results(), SizeIs(2));
-
-  search_spec.set_query("-");
-  search_results = icing.Search(search_spec, scoring_spec, result_spec);
-  EXPECT_THAT(search_results.status(), ProtoIsOk());
-  EXPECT_THAT(search_results.results(), SizeIs(2));
-
-  search_spec.set_query(":");
-  search_results = icing.Search(search_spec, scoring_spec, result_spec);
-  EXPECT_THAT(search_results.status(), ProtoIsOk());
-  EXPECT_THAT(search_results.results(), SizeIs(2));
-
-  search_spec.set_query("OR");
-  search_results = icing.Search(search_spec, scoring_spec, result_spec);
-  EXPECT_THAT(search_results.status(), ProtoIsOk());
-  EXPECT_THAT(search_results.results(), SizeIs(2));
-
-  search_spec.set_query(" ");
-  search_results = icing.Search(search_spec, scoring_spec, result_spec);
-  EXPECT_THAT(search_results.status(), ProtoIsOk());
-  EXPECT_THAT(search_results.results(), SizeIs(2));
-}
-
-TEST_F(IcingSearchEngineTest, EmojiSnippetTest) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
-  // String:     "Luca Brasi sleeps with the 🐟🐟🐟."
-  //              ^    ^     ^      ^    ^   ^ ^  ^ ^
-  // UTF8 idx:    0    5     11     18   23 27 3135 39
-  // UTF16 idx:   0    5     11     18   23 27 2931 33
-  // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
-  // and "🐟".
-  constexpr std::string_view kSicilianMessage =
-      "Luca Brasi sleeps with the 🐟🐟🐟.";
-  DocumentProto document = DocumentBuilder()
-                               .SetKey("namespace", "uri1")
-                               .SetSchema("Message")
-                               .AddStringProperty("body", kSicilianMessage)
-                               .Build();
-  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-  DocumentProto document_two =
-      DocumentBuilder()
-          .SetKey("namespace", "uri2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "Some other content.")
-          .Build();
-  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
-  // Search and request snippet matching but no windowing.
-  SearchSpecProto search_spec;
-  search_spec.set_query("🐟");
-  search_spec.set_term_match_type(MATCH_PREFIX);
-
-  ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
-
-  // Search and make sure that we got a single successful result
-  SearchResultProto search_results = icing.Search(
-      search_spec, ScoringSpecProto::default_instance(), result_spec);
-  ASSERT_THAT(search_results.status(), ProtoIsOk());
-  ASSERT_THAT(search_results.results(), SizeIs(1));
-  const SearchResultProto::ResultProto* result = &search_results.results(0);
-  EXPECT_THAT(result->document().uri(), Eq("uri1"));
-
-  // Ensure that one and only one property was matched and it was "body"
-  ASSERT_THAT(result->snippet().entries(), SizeIs(1));
-  const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
-  EXPECT_THAT(entry->property_name(), Eq("body"));
-
-  // Get the content for "subject" and see what the match is.
-  std::string_view content = GetString(&result->document(), "body");
-  ASSERT_THAT(content, Eq(kSicilianMessage));
-
-  // Ensure that there is one and only one match within "subject"
-  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
-  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
-
-  EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(27));
-  EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(4));
-  std::string_view match =
-      content.substr(match_proto.exact_match_byte_position(),
-                     match_proto.exact_match_byte_length());
-  ASSERT_THAT(match, Eq("🐟"));
-
-  // Ensure that the utf-16 values are also as expected
-  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(27));
-  EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentIndexFailureDeletion) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
-  // Testing has shown that adding ~600,000 terms generated this way will
-  // fill up the hit buffer.
-  std::vector<std::string> terms = GenerateUniqueTerms(600000);
-  std::string content = absl_ports::StrJoin(terms, " ");
-  DocumentProto document = DocumentBuilder()
-                               .SetKey("namespace", "uri1")
-                               .SetSchema("Message")
-                               .AddStringProperty("body", "foo " + content)
-                               .Build();
-  // We failed to add the document to the index fully. This means that we should
-  // reject the document from Icing entirely.
-  ASSERT_THAT(icing.Put(document).status(),
-              ProtoStatusIs(StatusProto::OUT_OF_SPACE));
-
-  // Make sure that the document isn't searchable.
-  SearchSpecProto search_spec;
-  search_spec.set_query("foo");
-  search_spec.set_term_match_type(MATCH_PREFIX);
-
-  SearchResultProto search_results =
-      icing.Search(search_spec, ScoringSpecProto::default_instance(),
-                   ResultSpecProto::default_instance());
-  ASSERT_THAT(search_results.status(), ProtoIsOk());
-  ASSERT_THAT(search_results.results(), IsEmpty());
-
-  // Make sure that the document isn't retrievable.
-  GetResultProto get_result =
-      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance());
-  ASSERT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
-              ProtoIsOk());
-
-  // Creates and inserts 6 documents, and index 6 termSix, 5 termFive, 4
-  // termFour, 3 termThree, 2 termTwo and one termOne.
-  DocumentProto document1 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri1")
-          .SetSchema("Email")
-          .SetCreationTimestampMs(10)
-          .AddStringProperty(
-              "subject", "termOne termTwo termThree termFour termFive termSix")
-          .Build();
-  DocumentProto document2 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri2")
-          .SetSchema("Email")
-          .SetCreationTimestampMs(10)
-          .AddStringProperty("subject",
-                             "termTwo termThree termFour termFive termSix")
-          .Build();
-  DocumentProto document3 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri3")
-          .SetSchema("Email")
-          .SetCreationTimestampMs(10)
-          .AddStringProperty("subject", "termThree termFour termFive termSix")
-          .Build();
-  DocumentProto document4 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri4")
-          .SetSchema("Email")
-          .SetCreationTimestampMs(10)
-          .AddStringProperty("subject", "termFour termFive termSix")
-          .Build();
-  DocumentProto document5 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri5")
-          .SetSchema("Email")
-          .SetCreationTimestampMs(10)
-          .AddStringProperty("subject", "termFive termSix")
-          .Build();
-  DocumentProto document6 = DocumentBuilder()
-                                .SetKey("namespace", "uri6")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "termSix")
-                                .Build();
-  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("t");
-  suggestion_spec.set_num_to_return(10);
-  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-      TermMatchType::PREFIX);
-
-  // Query all suggestions, and they will be ranked.
-  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
-  ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
-  ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
-  ASSERT_THAT(response.suggestions().at(3).query(), "termthree");
-  ASSERT_THAT(response.suggestions().at(4).query(), "termtwo");
-  ASSERT_THAT(response.suggestions().at(5).query(), "termone");
-
-  // Query first three suggestions, and they will be ranked.
-  suggestion_spec.set_num_to_return(3);
-  response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
-  ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
-  ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
-}
-
-TEST_F(IcingSearchEngineTest,
-       SearchSuggestionsTest_ShouldReturnInOneNamespace) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
-              ProtoIsOk());
-
-  DocumentProto document1 = DocumentBuilder()
-                                .SetKey("namespace1", "uri1")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "foo fool")
-                                .Build();
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace2", "uri2")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "fool")
-                                .Build();
-  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
-  SuggestionResponse::Suggestion suggestionFoo;
-  suggestionFoo.set_query("foo");
-  SuggestionResponse::Suggestion suggestionFool;
-  suggestionFool.set_query("fool");
-
-  // namespace1 has 2 results.
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("f");
-  suggestion_spec.add_namespace_filters("namespace1");
-  suggestion_spec.set_num_to_return(10);
-  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-      TermMatchType::PREFIX);
-
-  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions(),
-              UnorderedElementsAre(EqualsProto(suggestionFoo),
-                                   EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest,
-       SearchSuggestionsTest_ShouldReturnInMultipleNamespace) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
-              ProtoIsOk());
-
-  DocumentProto document1 = DocumentBuilder()
-                                .SetKey("namespace1", "uri1")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "fo")
-                                .Build();
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace2", "uri2")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "foo")
-                                .Build();
-  DocumentProto document3 = DocumentBuilder()
-                                .SetKey("namespace3", "uri3")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "fool")
-                                .Build();
-  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
-  SuggestionResponse::Suggestion suggestionFoo;
-  suggestionFoo.set_query("foo");
-  SuggestionResponse::Suggestion suggestionFool;
-  suggestionFool.set_query("fool");
-
-  // namespace2 and namespace3 has 2 results.
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("f");
-  suggestion_spec.add_namespace_filters("namespace2");
-  suggestion_spec.add_namespace_filters("namespace3");
-  suggestion_spec.set_num_to_return(10);
-  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-      TermMatchType::PREFIX);
-
-  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions(),
-              UnorderedElementsAre(EqualsProto(suggestionFoo),
-                                   EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest,
-       SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
-              ProtoIsOk());
-
-  // Index 4 documents,
-  // namespace1 has 2 hit2 for term one
-  // namespace2 has 2 hit2 for term two and 1 hit for term one.
-  DocumentProto document1 = DocumentBuilder()
-                                .SetKey("namespace1", "uri1")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "termone")
-                                .Build();
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace1", "uri2")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "termone")
-                                .Build();
-  DocumentProto document3 = DocumentBuilder()
-                                .SetKey("namespace2", "uri2")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "termone termtwo")
-                                .Build();
-  DocumentProto document4 = DocumentBuilder()
-                                .SetKey("namespace2", "uri3")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "termtwo")
-                                .Build();
-  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
-
-  SuggestionResponse::Suggestion suggestionTermOne;
-  suggestionTermOne.set_query("termone");
-  SuggestionResponse::Suggestion suggestionTermTwo;
-  suggestionTermTwo.set_query("termtwo");
-
-  // only search suggestion for namespace2. The correctly order should be
-  // {"termtwo", "termone"}. If we're not filtering out namespace1 when
-  // calculating our score, then it will be {"termone", "termtwo"}.
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("t");
-  suggestion_spec.add_namespace_filters("namespace2");
-  suggestion_spec.set_num_to_return(10);
-  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-      TermMatchType::PREFIX);
-
-  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions(),
-              ElementsAre(EqualsProto(suggestionTermTwo),
-                          EqualsProto(suggestionTermOne)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_DeletionTest) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
-              ProtoIsOk());
-
-  DocumentProto document1 = DocumentBuilder()
-                                .SetKey("namespace1", "uri1")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "fool")
-                                .Build();
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace2", "uri2")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(10)
-                                .AddStringProperty("subject", "fool")
-                                .Build();
-  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
-  SuggestionResponse::Suggestion suggestionFool;
-  suggestionFool.set_query("fool");
-
-  // namespace1 has this suggestion
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("f");
-  suggestion_spec.add_namespace_filters("namespace1");
-  suggestion_spec.set_num_to_return(10);
-  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-      TermMatchType::PREFIX);
-
-  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions(),
-              UnorderedElementsAre(EqualsProto(suggestionFool)));
-
-  // namespace2 has this suggestion
-  suggestion_spec.clear_namespace_filters();
-  suggestion_spec.add_namespace_filters("namespace2");
-  response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions(),
-              UnorderedElementsAre(EqualsProto(suggestionFool)));
-
-  // delete document from namespace 1
-  EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk());
-
-  // Now namespace1 will return empty
-  suggestion_spec.clear_namespace_filters();
-  suggestion_spec.add_namespace_filters("namespace1");
-  response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions(), IsEmpty());
-
-  // namespace2 still has this suggestion, so we can prove the reason of
-  // namespace 1 cannot find it is we filter it out, not it doesn't exist.
-  suggestion_spec.add_namespace_filters("namespace2");
-  response = icing.SearchSuggestions(suggestion_spec);
-  ASSERT_THAT(response.status(), ProtoIsOk());
-  ASSERT_THAT(response.suggestions(),
-              UnorderedElementsAre(EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ExpiredTest) {
-  DocumentProto document1 = DocumentBuilder()
-                                .SetKey("namespace1", "uri1")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(100)
-                                .SetTtlMs(500)
-                                .AddStringProperty("subject", "fool")
-                                .Build();
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace2", "uri2")
-                                .SetSchema("Email")
-                                .SetCreationTimestampMs(100)
-                                .SetTtlMs(1000)
-                                .AddStringProperty("subject", "fool")
-                                .Build();
-  {
-    auto fake_clock = std::make_unique<FakeClock>();
-    fake_clock->SetSystemTimeMilliseconds(400);
-
-    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                                std::make_unique<Filesystem>(),
-                                std::make_unique<IcingFilesystem>(),
-                                std::move(fake_clock), GetTestJniCache());
-    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-    ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
-                ProtoIsOk());
-
-    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
-    SuggestionResponse::Suggestion suggestionFool;
-    suggestionFool.set_query("fool");
-
-    // namespace1 has this suggestion
-    SuggestionSpecProto suggestion_spec;
-    suggestion_spec.set_prefix("f");
-    suggestion_spec.add_namespace_filters("namespace1");
-    suggestion_spec.set_num_to_return(10);
-    suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-        TermMatchType::PREFIX);
-
-    SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
-    ASSERT_THAT(response.status(), ProtoIsOk());
-    ASSERT_THAT(response.suggestions(),
-                UnorderedElementsAre(EqualsProto(suggestionFool)));
-
-    // namespace2 has this suggestion
-    suggestion_spec.clear_namespace_filters();
-    suggestion_spec.add_namespace_filters("namespace2");
-    response = icing.SearchSuggestions(suggestion_spec);
-    ASSERT_THAT(response.status(), ProtoIsOk());
-    ASSERT_THAT(response.suggestions(),
-                UnorderedElementsAre(EqualsProto(suggestionFool)));
-  }
-  // We reinitialize here so we can feed in a fake clock this time
-  {
-    // Time needs to be past document1 creation time (100) + ttl (500) for it
-    // to count as "expired". document2 is not expired since its ttl is 1000.
-    auto fake_clock = std::make_unique<FakeClock>();
-    fake_clock->SetSystemTimeMilliseconds(800);
-
-    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                                std::make_unique<Filesystem>(),
-                                std::make_unique<IcingFilesystem>(),
-                                std::move(fake_clock), GetTestJniCache());
-    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
-    SuggestionSpecProto suggestion_spec;
-    suggestion_spec.set_prefix("f");
-    suggestion_spec.add_namespace_filters("namespace1");
-    suggestion_spec.set_num_to_return(10);
-    suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-        TermMatchType::PREFIX);
-
-    // Now namespace1 will return empty
-    suggestion_spec.clear_namespace_filters();
-    suggestion_spec.add_namespace_filters("namespace1");
-    SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
-    ASSERT_THAT(response.status(), ProtoIsOk());
-    ASSERT_THAT(response.suggestions(), IsEmpty());
-
-    // namespace2 still has this suggestion
-    SuggestionResponse::Suggestion suggestionFool;
-    suggestionFool.set_query("fool");
-
-    suggestion_spec.add_namespace_filters("namespace2");
-    response = icing.SearchSuggestions(suggestion_spec);
-    ASSERT_THAT(response.status(), ProtoIsOk());
-    ASSERT_THAT(response.suggestions(),
-                UnorderedElementsAre(EqualsProto(suggestionFool)));
-  }
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_emptyPrefix) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("");
-  suggestion_spec.set_num_to_return(10);
-  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-      TermMatchType::PREFIX);
-
-  ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
-              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) {
-  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("prefix");
-  suggestion_spec.set_num_to_return(0);
-  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
-      TermMatchType::PREFIX);
-
-  ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
-              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-#ifndef ICING_JNI_TEST
 // We skip this test case when we're running in a jni_test since the data files
 // will be stored in the android-instrumented storage location, rather than the
 // normal cc_library runfiles directory. To get that storage location, it's
@@ -8690,6 +7596,12 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) {
 // this native side yet, we're just going to disable this. The functionality is
 // already well-tested across 4 different emulated OS's so we're not losing much
 // test coverage here.
+#ifndef ICING_JNI_TEST
+// Disable backwards compat test. This test is enabled in google3, but disabled
+// in jetpack/framework because we didn't want to keep the binary testdata files
+// in our repo.
+#define DISABLE_BACKWARDS_COMPAT_TEST
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
 TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) {
   // Copy the testdata files into our IcingSearchEngine directory
   std::string dir_without_portable_log;
@@ -8729,7 +7641,7 @@ TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) {
   EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
               Eq(InitializeStatsProto::NO_DATA_LOSS));
   EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
-              Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT));
+              Eq(InitializeStatsProto::NONE));
   EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
               Eq(InitializeStatsProto::NONE));
   EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
@@ -8843,6 +7755,7 @@ TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) {
   EXPECT_THAT(actual_results,
               EqualsSearchResultIgnoreStatsAndScores(expected_document3));
 }
+#endif  // DISABLE_BACKWARDS_COMPAT_TEST
 #endif  // !ICING_JNI_TEST
 
 }  // namespace
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 207c033..6d8632f 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -43,13 +43,14 @@ namespace lib {
 
 libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>>
 IndexProcessor::Create(const Normalizer* normalizer, Index* index,
+                       const IndexProcessor::Options& options,
                        const Clock* clock) {
   ICING_RETURN_ERROR_IF_NULL(normalizer);
   ICING_RETURN_ERROR_IF_NULL(index);
   ICING_RETURN_ERROR_IF_NULL(clock);
 
   return std::unique_ptr<IndexProcessor>(
-      new IndexProcessor(normalizer, index, clock));
+      new IndexProcessor(normalizer, index, options, clock));
 }
 
 libtextclassifier3::Status IndexProcessor::IndexDocument(
@@ -65,48 +66,53 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
   }
   index_->set_last_added_document_id(document_id);
   uint32_t num_tokens = 0;
-  libtextclassifier3::Status status;
+  libtextclassifier3::Status overall_status;
   for (const TokenizedSection& section : tokenized_document.sections()) {
     // TODO(b/152934343): pass real namespace ids in
     Index::Editor editor =
         index_->Edit(document_id, section.metadata.id,
                      section.metadata.term_match_type, /*namespace_id=*/0);
     for (std::string_view token : section.token_sequence) {
-      ++num_tokens;
-
-      switch (section.metadata.tokenizer) {
-        case StringIndexingConfig::TokenizerType::VERBATIM:
-          // data() is safe to use here because a token created from the
-          // VERBATIM tokenizer is the entire string value. The character at
-          // data() + token.length() is guaranteed to be a null char.
-          status = editor.BufferTerm(token.data());
-          break;
-        case StringIndexingConfig::TokenizerType::NONE:
-          ICING_LOG(WARNING)
-              << "Unexpected TokenizerType::NONE found when indexing document.";
-          [[fallthrough]];
-        case StringIndexingConfig::TokenizerType::PLAIN:
-          std::string normalized_term = normalizer_.NormalizeTerm(token);
-          status = editor.BufferTerm(normalized_term.c_str());
+      if (++num_tokens > options_.max_tokens_per_document) {
+        // Index all tokens buffered so far.
+        editor.IndexAllBufferedTerms();
+        if (put_document_stats != nullptr) {
+          put_document_stats->mutable_tokenization_stats()
+              ->set_exceeded_max_token_num(true);
+          put_document_stats->mutable_tokenization_stats()
+              ->set_num_tokens_indexed(options_.max_tokens_per_document);
+        }
+        switch (options_.token_limit_behavior) {
+          case Options::TokenLimitBehavior::kReturnError:
+            return absl_ports::ResourceExhaustedError(
+                "Max number of tokens reached!");
+          case Options::TokenLimitBehavior::kSuppressError:
+            return overall_status;
+        }
       }
-
-      if (!status.ok()) {
-        // We've encountered a failure. Bail out. We'll mark this doc as deleted
-        // and signal a failure to the client.
-        ICING_LOG(WARNING) << "Failed to buffer term in lite lexicon due to: "
-                           << status.error_message();
-        break;
+      std::string term = normalizer_.NormalizeTerm(token);
+      // Add this term to Hit buffer. Even if adding this hit fails, we keep
+      // trying to add more hits because it's possible that future hits could
+      // still be added successfully. For instance if the lexicon is full, we
+      // might fail to add a hit for a new term, but should still be able to
+      // add hits for terms that are already in the index.
+      auto status = editor.BufferTerm(term.c_str());
+      if (overall_status.ok() && !status.ok()) {
+        // If we've succeeded to add everything so far, set overall_status to
+        // represent this new failure. If we've already failed, no need to
+        // update the status - we're already going to return a resource
+        // exhausted error.
+        overall_status = status;
       }
     }
-    if (!status.ok()) {
-      break;
-    }
     // Add all the seen terms to the index with their term frequency.
-    status = editor.IndexAllBufferedTerms();
-    if (!status.ok()) {
-      ICING_LOG(WARNING) << "Failed to add hits in lite index due to: "
-                         << status.error_message();
-      break;
+    auto status = editor.IndexAllBufferedTerms();
+    if (overall_status.ok() && !status.ok()) {
+      // If we've succeeded so far, set overall_status to
+      // represent this new failure. If we've already failed, no need to
+      // update the status - we're already going to return a resource
+      // exhausted error.
+      overall_status = status;
     }
   }
 
@@ -117,11 +123,9 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
         num_tokens);
   }
 
-  // If we're either successful or we've hit resource exhausted, then attempt a
-  // merge.
-  if ((status.ok() || absl_ports::IsResourceExhausted(status)) &&
-      index_->WantsMerge()) {
-    ICING_LOG(ERROR) << "Merging the index at docid " << document_id << ".";
+  // Merge if necessary.
+  if (overall_status.ok() && index_->WantsMerge()) {
+    ICING_VLOG(1) << "Merging the index at docid " << document_id << ".";
 
     std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
     libtextclassifier3::Status merge_status = index_->Merge();
@@ -146,7 +150,7 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
     }
   }
 
-  return status;
+  return overall_status;
 }
 
 }  // namespace lib
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 269e41c..6b07c98 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -32,6 +32,23 @@ namespace lib {
 
 class IndexProcessor {
  public:
+  struct Options {
+    int32_t max_tokens_per_document;
+
+    // Indicates how a document exceeding max_tokens_per_document should be
+    // handled.
+    enum class TokenLimitBehavior {
+      // When set, the first max_tokens_per_document will be indexed. If the
+      // token count exceeds max_tokens_per_document, a ResourceExhausted error
+      // will be returned.
+      kReturnError,
+      // When set, the first max_tokens_per_document will be indexed. If the
+      // token count exceeds max_tokens_per_document, OK will be returned.
+      kSuppressError,
+    };
+    TokenLimitBehavior token_limit_behavior;
+  };
+
   // Factory function to create an IndexProcessor which does not take ownership
   // of any input components, and all pointers must refer to valid objects that
   // outlive the created IndexProcessor instance.
@@ -40,7 +57,8 @@ class IndexProcessor {
   //   An IndexProcessor on success
   //   FAILED_PRECONDITION if any of the pointers is null.
   static libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> Create(
-      const Normalizer* normalizer, Index* index, const Clock* clock);
+      const Normalizer* normalizer, Index* index, const Options& options,
+      const Clock* clock);
 
   // Add tokenized document to the index, associated with document_id. If the
   // number of tokens in the document exceeds max_tokens_per_document, then only
@@ -66,11 +84,18 @@ class IndexProcessor {
       PutDocumentStatsProto* put_document_stats = nullptr);
 
  private:
-  IndexProcessor(const Normalizer* normalizer, Index* index, const Clock* clock)
-      : normalizer_(*normalizer), index_(index), clock_(*clock) {}
+  IndexProcessor(const Normalizer* normalizer, Index* index,
+                 const Options& options, const Clock* clock)
+      : normalizer_(*normalizer),
+        index_(index),
+        options_(options),
+        clock_(*clock) {}
+
+  std::string NormalizeToken(const Token& token);
 
   const Normalizer& normalizer_;
   Index* const index_;
+  const Options options_;
   const Clock& clock_;
 };
 
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 1aad7d0..afeac4d 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -16,6 +16,7 @@
 #include "gmock/gmock.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/index-processor.h"
 #include "icing/index/index.h"
 #include "icing/legacy/core/icing-string-util.h"
@@ -23,7 +24,6 @@
 #include "icing/schema/schema-util.h"
 #include "icing/schema/section-manager.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
@@ -168,6 +168,17 @@ void CleanUp(const Filesystem& filesystem, const std::string& index_dir) {
   filesystem.DeleteDirectoryRecursively(index_dir.c_str());
 }
 
+std::unique_ptr<IndexProcessor> CreateIndexProcessor(
+    const Normalizer* normalizer, Index* index, const Clock* clock) {
+  IndexProcessor::Options processor_options{};
+  processor_options.max_tokens_per_document = 1024 * 1024 * 10;
+  processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
+  return IndexProcessor::Create(normalizer, index, processor_options, clock)
+      .ValueOrDie();
+}
+
 void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
   bool run_via_adb = absl::GetFlag(FLAGS_adb);
   if (!run_via_adb) {
@@ -189,9 +200,9 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
   Clock clock;
   std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<IndexProcessor> index_processor,
-      IndexProcessor::Create(normalizer.get(), index.get(), &clock));
+  std::unique_ptr<IndexProcessor> index_processor =
+      CreateIndexProcessor(normalizer.get(), index.get(), &clock);
+
   DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0));
   TokenizedDocument tokenized_document(std::move(
       TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
@@ -243,9 +254,8 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
   Clock clock;
   std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<IndexProcessor> index_processor,
-      IndexProcessor::Create(normalizer.get(), index.get(), &clock));
+  std::unique_ptr<IndexProcessor> index_processor =
+      CreateIndexProcessor(normalizer.get(), index.get(), &clock);
 
   DocumentProto input_document =
       CreateDocumentWithTenProperties(state.range(0));
@@ -299,9 +309,8 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
   Clock clock;
   std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<IndexProcessor> index_processor,
-      IndexProcessor::Create(normalizer.get(), index.get(), &clock));
+  std::unique_ptr<IndexProcessor> index_processor =
+      CreateIndexProcessor(normalizer.get(), index.get(), &clock);
 
   DocumentProto input_document =
       CreateDocumentWithDiacriticLetters(state.range(0));
@@ -355,9 +364,8 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
   Clock clock;
   std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<IndexProcessor> index_processor,
-      IndexProcessor::Create(normalizer.get(), index.get(), &clock));
+  std::unique_ptr<IndexProcessor> index_processor =
+      CreateIndexProcessor(normalizer.get(), index.get(), &clock);
 
   DocumentProto input_document = CreateDocumentWithHiragana(state.range(0));
   TokenizedDocument tokenized_document(std::move(
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 7746688..8a6a9f5 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -27,9 +27,9 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
-#include "icing/absl_ports/str_join.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/index.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -48,8 +48,6 @@
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/icu-data-file-helper.h"
-#include "icing/testing/random-string.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
@@ -90,8 +88,6 @@ constexpr std::string_view kRepeatedProperty = "repeated";
 constexpr std::string_view kSubProperty = "submessage";
 constexpr std::string_view kNestedType = "NestedType";
 constexpr std::string_view kNestedProperty = "nested";
-constexpr std::string_view kExactVerbatimProperty = "verbatimExact";
-constexpr std::string_view kPrefixedVerbatimProperty = "verbatimPrefixed";
 
 constexpr DocumentId kDocumentId0 = 0;
 constexpr DocumentId kDocumentId1 = 1;
@@ -100,8 +96,6 @@ constexpr SectionId kExactSectionId = 0;
 constexpr SectionId kPrefixedSectionId = 1;
 constexpr SectionId kRepeatedSectionId = 2;
 constexpr SectionId kNestedSectionId = 3;
-constexpr SectionId kExactVerbatimSectionId = 4;
-constexpr SectionId kPrefixedVerbatimSectionId = 5;
 
 using Cardinality = PropertyConfigProto::Cardinality;
 using DataType = PropertyConfigProto::DataType;
@@ -110,23 +104,21 @@ using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::Test;
 
-constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
-    PropertyConfigProto::DataType::STRING;
-constexpr PropertyConfigProto::DataType::Code TYPE_BYTES =
-    PropertyConfigProto::DataType::BYTES;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_BYTES =
+    PropertyConfigProto_DataType_Code_BYTES;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
-    PropertyConfigProto::Cardinality::REPEATED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_VERBATIM =
-    StringIndexingConfig::TokenizerType::VERBATIM;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
-constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
 
 class IndexProcessorTest : public Test {
  protected:
@@ -153,12 +145,9 @@ class IndexProcessorTest : public Test {
         normalizer_factory::Create(
             /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
 
-    std::string schema_store_dir = GetTestTempDir() + "/schema_store";
-    ASSERT_TRUE(
-        filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str()));
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
-        SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+        SchemaStore::Create(&filesystem_, GetTestTempDir(), &fake_clock_));
     SchemaProto schema =
         SchemaBuilder()
             .AddType(
@@ -189,16 +178,6 @@ class IndexProcessorTest : public Test {
                             .SetCardinality(CARDINALITY_REPEATED))
                     .AddProperty(
                         PropertyConfigBuilder()
-                            .SetName(kExactVerbatimProperty)
-                            .SetDataTypeString(MATCH_EXACT, TOKENIZER_VERBATIM)
-                            .SetCardinality(CARDINALITY_REPEATED))
-                    .AddProperty(
-                        PropertyConfigBuilder()
-                            .SetName(kPrefixedVerbatimProperty)
-                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_VERBATIM)
-                            .SetCardinality(CARDINALITY_REPEATED))
-                    .AddProperty(
-                        PropertyConfigBuilder()
                             .SetName(kSubProperty)
                             .SetDataTypeDocument(
                                 kNestedType, /*index_nested_properties=*/true)
@@ -214,9 +193,15 @@ class IndexProcessorTest : public Test {
             .Build();
     ICING_ASSERT_OK(schema_store_->SetSchema(schema));
 
+    IndexProcessor::Options processor_options;
+    processor_options.max_tokens_per_document = 1000;
+    processor_options.token_limit_behavior =
+        IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
     ICING_ASSERT_OK_AND_ASSIGN(
         index_processor_,
-        IndexProcessor::Create(normalizer_.get(), index_.get(), &fake_clock_));
+        IndexProcessor::Create(normalizer_.get(), index_.get(),
+                               processor_options, &fake_clock_));
     mock_icing_filesystem_ = std::make_unique<IcingMockFilesystem>();
   }
 
@@ -247,12 +232,17 @@ std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
 }
 
 TEST_F(IndexProcessorTest, CreationWithNullPointerShouldFail) {
+  IndexProcessor::Options processor_options;
+  processor_options.max_tokens_per_document = 1000;
+  processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
   EXPECT_THAT(IndexProcessor::Create(/*normalizer=*/nullptr, index_.get(),
-                                     &fake_clock_),
+                                     processor_options, &fake_clock_),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 
   EXPECT_THAT(IndexProcessor::Create(normalizer_.get(), /*index=*/nullptr,
-                                     &fake_clock_),
+                                     processor_options, &fake_clock_),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
@@ -444,68 +434,103 @@ TEST_F(IndexProcessorTest, DocWithRepeatedProperty) {
                   kDocumentId0, std::vector<SectionId>{kRepeatedSectionId})));
 }
 
-// TODO(b/196771754) This test is disabled on Android because it takes too long
-// to generate all of the unique terms and the test times out. Try storing these
-// unique terms in a file that the test can read from.
-#ifndef __ANDROID__
+TEST_F(IndexProcessorTest, TooManyTokensReturnError) {
+  // Only allow the first four tokens ("hello", "world", "good", "night") to be
+  // indexed.
+  IndexProcessor::Options options;
+  options.max_tokens_per_document = 4;
+  options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
 
-TEST_F(IndexProcessorTest, HitBufferExhaustedTest) {
-  // Testing has shown that adding ~600,000 hits will fill up the hit buffer.
-  std::vector<std::string> unique_terms_ = GenerateUniqueTerms(200000);
-  std::string content = absl_ports::StrJoin(unique_terms_, " ");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_processor_, IndexProcessor::Create(normalizer_.get(), index_.get(),
+                                               options, &fake_clock_));
 
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
           .SetSchema(std::string(kFakeType))
-          .AddStringProperty(std::string(kExactProperty), content)
-          .AddStringProperty(std::string(kPrefixedProperty), content)
-          .AddStringProperty(std::string(kRepeatedProperty), content)
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
           .Build();
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
                                 document));
   EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
-              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED,
-                       testing::HasSubstr("Hit buffer is full!")));
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  // "night" should have been indexed.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("night", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
+
+  // "moon" should not have been.
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             index_->GetIterator("moon", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
 }
 
-TEST_F(IndexProcessorTest, LexiconExhaustedTest) {
-  // Testing has shown that adding ~300,000 terms generated this way will
-  // fill up the lexicon.
-  std::vector<std::string> unique_terms_ = GenerateUniqueTerms(300000);
-  std::string content = absl_ports::StrJoin(unique_terms_, " ");
+TEST_F(IndexProcessorTest, TooManyTokensSuppressError) {
+  // Only allow the first four tokens ("hello", "world", "good", "night") to be
+  // indexed.
+  IndexProcessor::Options options;
+  options.max_tokens_per_document = 4;
+  options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kSuppressError;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_processor_, IndexProcessor::Create(normalizer_.get(), index_.get(),
+                                               options, &fake_clock_));
 
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
           .SetSchema(std::string(kFakeType))
-          .AddStringProperty(std::string(kExactProperty), content)
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
           .Build();
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
                                 document));
   EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
-              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED,
-                       testing::HasSubstr("Unable to add term")));
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
-}
 
-#endif  // __ANDROID__
+  // "night" should have been indexed.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("night", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
+
+  // "moon" should not have been.
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             index_->GetIterator("moon", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
 
 TEST_F(IndexProcessorTest, TooLongTokens) {
   // Only allow the tokens of length four, truncating "hello", "world" and
   // "night".
+  IndexProcessor::Options options;
+  options.max_tokens_per_document = 1000;
+
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Normalizer> normalizer,
                              normalizer_factory::Create(
                                  /*max_term_byte_size=*/4));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      index_processor_,
-      IndexProcessor::Create(normalizer.get(), index_.get(), &fake_clock_));
+      index_processor_, IndexProcessor::Create(normalizer.get(), index_.get(),
+                                               options, &fake_clock_));
 
   DocumentProto document =
       DocumentBuilder()
@@ -667,6 +692,16 @@ TEST_F(IndexProcessorTest, NonAsciiIndexing) {
       lang_segmenter_,
       language_segmenter_factory::Create(std::move(segmenter_options)));
 
+  IndexProcessor::Options processor_options;
+  processor_options.max_tokens_per_document = 1000;
+  processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_processor_,
+      IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
+                             &fake_clock_));
+
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
@@ -692,13 +727,23 @@ TEST_F(IndexProcessorTest, NonAsciiIndexing) {
 
 TEST_F(IndexProcessorTest,
        LexiconFullIndexesSmallerTokensReturnsResourceExhausted) {
+  IndexProcessor::Options processor_options;
+  processor_options.max_tokens_per_document = 1000;
+  processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_processor_,
+      IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
+                             &fake_clock_));
+
   // This is the maximum token length that an empty lexicon constructed for a
   // lite index with merge size of 1MiB can support.
   constexpr int kMaxTokenLength = 16777217;
   // Create a string "ppppppp..." with a length that is too large to fit into
   // the lexicon.
   std::string enormous_string(kMaxTokenLength + 1, 'p');
-  DocumentProto document_one =
+  DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
           .SetSchema(std::string(kFakeType))
@@ -709,10 +754,24 @@ TEST_F(IndexProcessorTest,
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
-                                document_one));
+                                document));
   EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kExactSectionId})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
 }
 
 TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) {
@@ -736,9 +795,15 @@ TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) {
   ICING_ASSERT_OK_AND_ASSIGN(
       index_, Index::Create(options, &filesystem_, &icing_filesystem_));
 
+  IndexProcessor::Options processor_options;
+  processor_options.max_tokens_per_document = 1000;
+  processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(normalizer_.get(), index_.get(), &fake_clock_));
+      IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
+                             &fake_clock_));
   DocumentId doc_id = 0;
   // Have determined experimentally that indexing 3373 documents with this text
   // will cause the LiteIndex to fill up. Further indexing will fail unless the
@@ -792,9 +857,15 @@ TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
       index_,
       Index::Create(options, &filesystem_, mock_icing_filesystem_.get()));
 
+  IndexProcessor::Options processor_options;
+  processor_options.max_tokens_per_document = 1000;
+  processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(normalizer_.get(), index_.get(), &fake_clock_));
+      IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
+                             &fake_clock_));
 
   // 3. Index one document. This should fit in the LiteIndex without requiring a
   // merge.
@@ -816,95 +887,6 @@ TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
   EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
 }
 
-TEST_F(IndexProcessorTest, ExactVerbatimProperty) {
-  DocumentProto document =
-      DocumentBuilder()
-          .SetKey("icing", "fake_type/1")
-          .SetSchema(std::string(kFakeType))
-          .AddStringProperty(std::string(kExactVerbatimProperty),
-                             "Hello, world!")
-          .Build();
-  ICING_ASSERT_OK_AND_ASSIGN(
-      TokenizedDocument tokenized_document,
-      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
-                                document));
-  EXPECT_THAT(tokenized_document.num_tokens(), 1);
-
-  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
-              IsOk());
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("Hello, world!", kSectionIdMaskAll,
-                          TermMatchType::EXACT_ONLY));
-  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
-  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
-      {kExactVerbatimSectionId, 1}};
-
-  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
-                        kDocumentId0, expectedMap)));
-}
-
-TEST_F(IndexProcessorTest, PrefixVerbatimProperty) {
-  DocumentProto document =
-      DocumentBuilder()
-          .SetKey("icing", "fake_type/1")
-          .SetSchema(std::string(kFakeType))
-          .AddStringProperty(std::string(kPrefixedVerbatimProperty),
-                             "Hello, world!")
-          .Build();
-  ICING_ASSERT_OK_AND_ASSIGN(
-      TokenizedDocument tokenized_document,
-      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
-                                document));
-  EXPECT_THAT(tokenized_document.num_tokens(), 1);
-
-  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
-              IsOk());
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
-
-  // We expect to match the document we indexed as "Hello, w" is a prefix
-  // of "Hello, world!"
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("Hello, w", kSectionIdMaskAll,
-                                                 TermMatchType::PREFIX));
-  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
-  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
-      {kPrefixedVerbatimSectionId, 1}};
-
-  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
-                        kDocumentId0, expectedMap)));
-}
-
-TEST_F(IndexProcessorTest, VerbatimPropertyDoesntMatchSubToken) {
-  DocumentProto document =
-      DocumentBuilder()
-          .SetKey("icing", "fake_type/1")
-          .SetSchema(std::string(kFakeType))
-          .AddStringProperty(std::string(kPrefixedVerbatimProperty),
-                             "Hello, world!")
-          .Build();
-  ICING_ASSERT_OK_AND_ASSIGN(
-      TokenizedDocument tokenized_document,
-      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
-                                document));
-  EXPECT_THAT(tokenized_document.num_tokens(), 1);
-
-  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
-              IsOk());
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("world", kSectionIdMaskAll, TermMatchType::PREFIX));
-  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
-
-  // We should not have hits for term "world" as the index processor should
-  // create a sole token "Hello, world! for the document.
-  EXPECT_THAT(hits, IsEmpty());
-}
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 02ba699..db59ad2 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -36,7 +36,6 @@
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
-#include "icing/scoring/ranker.h"
 #include "icing/store/document-id.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
@@ -71,25 +70,39 @@ IcingDynamicTrie::Options GetMainLexiconOptions() {
   return IcingDynamicTrie::Options();
 }
 
-enum class MergeAction { kTakeLiteTerm, kTakeMainTerm, kMergeTerms };
+// Helper function to check if a term is in the given namespaces.
+// TODO(tjbarron): Implement a method PropertyReadersAll.HasAnyProperty().
+bool IsTermInNamespaces(
+    const IcingDynamicTrie::PropertyReadersAll& property_reader,
+    uint32_t value_index, const std::vector<NamespaceId>& namespace_ids) {
+  if (namespace_ids.empty()) {
+    return true;
+  }
+  for (NamespaceId namespace_id : namespace_ids) {
+    if (property_reader.HasProperty(GetNamespacePropertyId(namespace_id),
+                                    value_index)) {
+      return true;
+    }
+  }
+
+  return false;
+}
 
-// Merge the TermMetadata from lite index and main index. If the term exists in
-// both index, sum up its hit count and push it to the term heap.
-// The heap is a min-heap. So that we can avoid some push operation but the time
-// complexity is O(NlgK) which N is total number of term and K is num_to_return.
-std::vector<TermMetadata> MergeAndRankTermMetadatas(
+enum class MergeAction { kTakeLiteTerm, kTakeMainTerm, kMergeTerms };
+std::vector<TermMetadata> MergeTermMetadatas(
     std::vector<TermMetadata> lite_term_metadata_list,
     std::vector<TermMetadata> main_term_metadata_list, int num_to_return) {
-  std::vector<TermMetadata> merged_term_metadata_heap;
-  merged_term_metadata_heap.reserve(
+  std::vector<TermMetadata> merged_term_metadata_list;
+  merged_term_metadata_list.reserve(
       std::min(lite_term_metadata_list.size() + main_term_metadata_list.size(),
                static_cast<size_t>(num_to_return)));
 
   auto lite_term_itr = lite_term_metadata_list.begin();
   auto main_term_itr = main_term_metadata_list.begin();
   MergeAction merge_action;
-  while (lite_term_itr != lite_term_metadata_list.end() ||
-         main_term_itr != main_term_metadata_list.end()) {
+  while (merged_term_metadata_list.size() < num_to_return &&
+         (lite_term_itr != lite_term_metadata_list.end() ||
+          main_term_itr != main_term_metadata_list.end())) {
     // Get pointers to the next metadatas in each group, if available
     // Determine how to merge.
     if (main_term_itr == main_term_metadata_list.end()) {
@@ -106,32 +119,23 @@ std::vector<TermMetadata> MergeAndRankTermMetadatas(
     }
     switch (merge_action) {
       case MergeAction::kTakeLiteTerm:
-        PushToTermHeap(std::move(*lite_term_itr), num_to_return,
-                       merged_term_metadata_heap);
+        merged_term_metadata_list.push_back(std::move(*lite_term_itr));
         ++lite_term_itr;
         break;
       case MergeAction::kTakeMainTerm:
-        PushToTermHeap(std::move(*main_term_itr), num_to_return,
-                       merged_term_metadata_heap);
+        merged_term_metadata_list.push_back(std::move(*main_term_itr));
         ++main_term_itr;
         break;
       case MergeAction::kMergeTerms:
         int total_est_hit_count =
             lite_term_itr->hit_count + main_term_itr->hit_count;
-        PushToTermHeap(TermMetadata(std::move(lite_term_itr->content),
-                                    total_est_hit_count),
-                       num_to_return, merged_term_metadata_heap);
+        merged_term_metadata_list.emplace_back(
+            std::move(lite_term_itr->content), total_est_hit_count);
         ++lite_term_itr;
         ++main_term_itr;
         break;
     }
   }
-  // Reverse the list since we pop them from a min heap and we need to return in
-  // decreasing order.
-  std::vector<TermMetadata> merged_term_metadata_list =
-      PopAllTermsFromHeap(merged_term_metadata_heap);
-  std::reverse(merged_term_metadata_list.begin(),
-               merged_term_metadata_list.end());
   return merged_term_metadata_list;
 }
 
@@ -210,56 +214,77 @@ Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
 
 libtextclassifier3::StatusOr<std::vector<TermMetadata>>
 Index::FindLiteTermsByPrefix(const std::string& prefix,
-                             const NamespaceChecker* namespace_checker) {
+                             const std::vector<NamespaceId>& namespace_ids,
+                             int num_to_return) {
   // Finds all the terms that start with the given prefix in the lexicon.
   IcingDynamicTrie::Iterator term_iterator(lite_index_->lexicon(),
                                            prefix.c_str());
 
+  // A property reader to help check if a term has some property.
+  IcingDynamicTrie::PropertyReadersAll property_reader(lite_index_->lexicon());
+
   std::vector<TermMetadata> term_metadata_list;
-  while (term_iterator.IsValid()) {
+  while (term_iterator.IsValid() && term_metadata_list.size() < num_to_return) {
     uint32_t term_value_index = term_iterator.GetValueIndex();
 
+    // Skips the terms that don't exist in the given namespaces. We won't skip
+    // any terms if namespace_ids is empty.
+    if (!IsTermInNamespaces(property_reader, term_value_index, namespace_ids)) {
+      term_iterator.Advance();
+      continue;
+    }
+
     ICING_ASSIGN_OR_RETURN(
         uint32_t term_id,
         term_id_codec_->EncodeTvi(term_value_index, TviType::LITE),
         absl_ports::InternalError("Failed to access terms in lexicon."));
-    ICING_ASSIGN_OR_RETURN(int hit_count,
-                           lite_index_->CountHits(term_id, namespace_checker));
-    if (hit_count > 0) {
-      // There is at least one document in the given namespace has this term.
-      term_metadata_list.push_back(
-          TermMetadata(term_iterator.GetKey(), hit_count));
-    }
+
+    term_metadata_list.emplace_back(term_iterator.GetKey(),
+                                    lite_index_->CountHits(term_id));
 
     term_iterator.Advance();
   }
+  if (term_iterator.IsValid()) {
+    // We exited the loop above because we hit the num_to_return limit.
+    ICING_LOG(WARNING) << "Ran into limit of " << num_to_return
+                       << " retrieving suggestions for " << prefix
+                       << ". Some suggestions may not be returned and others "
+                          "may be misranked.";
+  }
   return term_metadata_list;
 }
 
 libtextclassifier3::StatusOr<std::vector<TermMetadata>>
-Index::FindTermsByPrefix(const std::string& prefix, int num_to_return,
-                         TermMatchType::Code term_match_type,
-                         const NamespaceChecker* namespace_checker) {
+Index::FindTermsByPrefix(const std::string& prefix,
+                         const std::vector<NamespaceId>& namespace_ids,
+                         int num_to_return) {
   std::vector<TermMetadata> term_metadata_list;
   if (num_to_return <= 0) {
     return term_metadata_list;
   }
+
   // Get results from the LiteIndex.
-  ICING_ASSIGN_OR_RETURN(std::vector<TermMetadata> lite_term_metadata_list,
-                         FindLiteTermsByPrefix(prefix, namespace_checker));
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<TermMetadata> lite_term_metadata_list,
+      FindLiteTermsByPrefix(prefix, namespace_ids, num_to_return));
+
   // Append results from the MainIndex.
-  ICING_ASSIGN_OR_RETURN(std::vector<TermMetadata> main_term_metadata_list,
-                         main_index_->FindTermsByPrefix(prefix, term_match_type,
-                                                        namespace_checker));
-  return MergeAndRankTermMetadatas(std::move(lite_term_metadata_list),
-                                   std::move(main_term_metadata_list),
-                                   num_to_return);
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<TermMetadata> main_term_metadata_list,
+      main_index_->FindTermsByPrefix(prefix, namespace_ids, num_to_return));
+
+  return MergeTermMetadatas(std::move(lite_term_metadata_list),
+                            std::move(main_term_metadata_list), num_to_return);
 }
 
 IndexStorageInfoProto Index::GetStorageInfo() const {
   IndexStorageInfoProto storage_info;
   int64_t directory_size = filesystem_->GetDiskUsage(options_.base_dir.c_str());
-  storage_info.set_index_size(Filesystem::SanitizeFileSize(directory_size));
+  if (directory_size != Filesystem::kBadFileSize) {
+    storage_info.set_index_size(directory_size);
+  } else {
+    storage_info.set_index_size(-1);
+  }
   storage_info = lite_index_->GetStorageInfo(std::move(storage_info));
   return main_index_->GetStorageInfo(std::move(storage_info));
 }
diff --git a/icing/index/index.h b/icing/index/index.h
index 5c53349..eab5be8 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -32,12 +32,10 @@
 #include "icing/index/term-id-codec.h"
 #include "icing/index/term-metadata.h"
 #include "icing/legacy/index/icing-filesystem.h"
-#include "icing/proto/debug.pb.h"
 #include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
-#include "icing/store/namespace-checker.h"
 #include "icing/store/namespace-id.h"
 #include "icing/util/crc32.h"
 
@@ -144,14 +142,9 @@ class Index {
   //                 index.
   // verbosity > 0, more detailed debug information including raw postings
   //                lists.
-  IndexDebugInfoProto GetDebugInfo(int verbosity) const {
-    IndexDebugInfoProto debug_info;
-    *debug_info.mutable_index_storage_info() = GetStorageInfo();
-    *debug_info.mutable_lite_index_info() =
-        lite_index_->GetDebugInfo(verbosity);
-    *debug_info.mutable_main_index_info() =
-        main_index_->GetDebugInfo(verbosity);
-    return debug_info;
+  void GetDebugInfo(int verbosity, std::string* out) const {
+    lite_index_->GetDebugInfo(verbosity, out);
+    main_index_->GetDebugInfo(verbosity, out);
   }
 
   // Returns the byte size of the all the elements held in the index. This
@@ -188,17 +181,17 @@ class Index {
       TermMatchType::Code term_match_type);
 
   // Finds terms with the given prefix in the given namespaces. If
-  // 'namespace_ids' is empty, returns results from all the namespaces. Results
-  // are sorted in decreasing order of hit count. Number of results are no more
-  // than 'num_to_return'.
+  // 'namespace_ids' is empty, returns results from all the namespaces. The
+  // input prefix must be normalized, otherwise inaccurate results may be
+  // returned. Results are not sorted specifically and are in their original
+  // order. Number of results are no more than 'num_to_return'.
   //
   // Returns:
   //   A list of TermMetadata on success
   //   INTERNAL_ERROR if failed to access term data.
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
-      const std::string& prefix, int num_to_return,
-      TermMatchType::Code term_match_type,
-      const NamespaceChecker* namespace_checker);
+      const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
+      int num_to_return);
 
   // A class that can be used to add hits to the index.
   //
@@ -274,7 +267,8 @@ class Index {
         filesystem_(filesystem) {}
 
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
-      const std::string& prefix, const NamespaceChecker* namespace_checker);
+      const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
+      int num_to_return);
 
   std::unique_ptr<LiteIndex> lite_index_;
   std::unique_ptr<MainIndex> main_index_;
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 8355c01..16593ef 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -31,12 +31,10 @@
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
-#include "icing/proto/debug.pb.h"
 #include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
-#include "icing/testing/always-true-namespace-checker-impl.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/random-string.h"
 #include "icing/testing/tmp-directory.h"
@@ -90,10 +88,18 @@ constexpr DocumentId kDocumentId4 = 4;
 constexpr DocumentId kDocumentId5 = 5;
 constexpr DocumentId kDocumentId6 = 6;
 constexpr DocumentId kDocumentId7 = 7;
-constexpr DocumentId kDocumentId8 = 8;
 constexpr SectionId kSectionId2 = 2;
 constexpr SectionId kSectionId3 = 3;
 
+// The value returned by IndexBlock::ApproximateFullPostingListHitsForBlock(
+//    GetBlockSize(),
+//    GetPostingListIndexBits(posting_list_utils::min_posting_list_size()));
+constexpr int kMinSizePlApproxHits = 3;
+// The value returned by IndexBlock::ApproximateFullPostingListHitsForBlock(
+//    GetBlockSize(),
+//    GetPostingListIndexBits(2 * posting_list_utils::min_posting_list_size()));
+constexpr int kSecondSmallestPlApproxHits = 7;
+
 std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
   std::vector<DocHitInfo> infos;
   while (iterator->Advance().ok()) {
@@ -909,306 +915,217 @@ TEST_F(IndexTest, InvalidHitBufferSize) {
 TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*num_to_return=*/0,
-                                        TermMatchType::PREFIX, &impl),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/0),
               IsOkAndHolds(IsEmpty()));
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo",
-                                        /*num_to_return=*/-1,
-                                        TermMatchType::PREFIX, &impl),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/-1),
               IsOkAndHolds(IsEmpty()));
 
   ICING_ASSERT_OK(index_->Merge());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo",
-                                        /*num_to_return=*/0,
-                                        TermMatchType::PREFIX, &impl),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/0),
               IsOkAndHolds(IsEmpty()));
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo",
-                                        /*num_to_return=*/-1,
-                                        TermMatchType::PREFIX, &impl),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/-1),
               IsOkAndHolds(IsEmpty()));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectResult) {
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // "b" should only match "bar" but not "foo".
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/10),
               IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
 
   ICING_ASSERT_OK(index_->Merge());
 
   // "b" should only match "bar" but not "foo".
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/10),
+              IsOkAndHolds(UnorderedElementsAre(
+                  EqualsTermMetadata("bar", kMinSizePlApproxHits))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldRespectNumToReturn) {
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("fo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // We have 3 results but only 2 should be returned.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
-                                        /*num_to_return=*/2,
-                                        TermMatchType::PREFIX, &impl),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/2),
               IsOkAndHolds(SizeIs(2)));
 
   ICING_ASSERT_OK(index_->Merge());
 
   // We have 3 results but only 2 should be returned.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
-                                        /*num_to_return=*/2,
-                                        TermMatchType::PREFIX, &impl),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/2),
               IsOkAndHolds(SizeIs(2)));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
+TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInOneNamespace) {
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit2 =
       index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/1);
-  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
-  Index::Editor edit3 =
-      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/2);
-  EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
-  EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
-
-  // Should return "fo", "foo" and "fool" across all namespaces.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
-                  EqualsTermMetadata("fool", 1))));
+  // namespace with id 0 has 2 results.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/10),
+              IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
+                                                EqualsTermMetadata("foo", 1))));
+  // namespace with id 1 has 1 result.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1},
+                                /*num_to_return=*/10),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fool", 1))));
 
   ICING_ASSERT_OK(index_->Merge());
 
-  // Should return "fo", "foo" and "fool" across all namespaces.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
+  // namespace with id 0 has 2 results.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/10),
               IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
-                  EqualsTermMetadata("fool", 1))));
+                  EqualsTermMetadata("fo", kMinSizePlApproxHits),
+                  EqualsTermMetadata("foo", kMinSizePlApproxHits))));
+  // namespace with id 1 has 1 result.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1},
+                                        /*num_to_return=*/10),
+              IsOkAndHolds(UnorderedElementsAre(
+                  EqualsTermMetadata("fool", kMinSizePlApproxHits))));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
+TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInMultipleNamespaces) {
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
-  EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
-  EXPECT_THAT(edit1.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit2 =
       index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
+                   /*namespace_id=*/1);
+  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
-  // 'foo' has 1 hit, 'fool' has 2 hits.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
-                                       EqualsTermMetadata("foo", 1))));
+  Index::Editor edit3 =
+      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/2);
+  EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
+
+  // Should return "foo" and "fool" which are in namespaces with ids 1 and 2.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1, 2},
+                                /*num_to_return=*/10),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 1))));
 
   ICING_ASSERT_OK(index_->Merge());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
-                                       EqualsTermMetadata("foo", 1))));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1, 2},
+                                /*num_to_return=*/10),
+      IsOkAndHolds(UnorderedElementsAre(
+          EqualsTermMetadata("foo", kMinSizePlApproxHits),
+          EqualsTermMetadata("fool", kMinSizePlApproxHits))));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnInOrder) {
-  // Push 6 term-six, 5 term-five, 4 term-four, 3 term-three, 2 term-two and one
-  // term-one into lite index.
+TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
-  EXPECT_THAT(edit1.BufferTerm("term-one"), IsOk());
-  EXPECT_THAT(edit1.BufferTerm("term-two"), IsOk());
-  EXPECT_THAT(edit1.BufferTerm("term-three"), IsOk());
-  EXPECT_THAT(edit1.BufferTerm("term-four"), IsOk());
-  EXPECT_THAT(edit1.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit1.BufferTerm("term-six"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit2 =
-      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit2.BufferTerm("term-two"), IsOk());
-  EXPECT_THAT(edit2.BufferTerm("term-three"), IsOk());
-  EXPECT_THAT(edit2.BufferTerm("term-four"), IsOk());
-  EXPECT_THAT(edit2.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit2.BufferTerm("term-six"), IsOk());
+      index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/1);
+  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit3 =
-      index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit3.BufferTerm("term-three"), IsOk());
-  EXPECT_THAT(edit3.BufferTerm("term-four"), IsOk());
-  EXPECT_THAT(edit3.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit3.BufferTerm("term-six"), IsOk());
+      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/2);
+  EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
 
-  Index::Editor edit4 =
-      index_->Edit(kDocumentId4, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit4.BufferTerm("term-four"), IsOk());
-  EXPECT_THAT(edit4.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit4.BufferTerm("term-six"), IsOk());
-  EXPECT_THAT(edit4.IndexAllBufferedTerms(), IsOk());
-
-  Index::Editor edit5 =
-      index_->Edit(kDocumentId5, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit5.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit5.BufferTerm("term-six"), IsOk());
-  EXPECT_THAT(edit5.IndexAllBufferedTerms(), IsOk());
-
-  Index::Editor edit6 =
-      index_->Edit(kDocumentId6, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit6.BufferTerm("term-six"), IsOk());
-  EXPECT_THAT(edit6.IndexAllBufferedTerms(), IsOk());
-
-  // verify the order in lite index is correct.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"t",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
-                                       EqualsTermMetadata("term-five", 5),
-                                       EqualsTermMetadata("term-four", 4),
-                                       EqualsTermMetadata("term-three", 3),
-                                       EqualsTermMetadata("term-two", 2),
-                                       EqualsTermMetadata("term-one", 1))));
+  // Should return "fo", "foo" and "fool" across all namespaces.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{},
+                                        /*num_to_return=*/10),
+              IsOkAndHolds(UnorderedElementsAre(
+                  EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
+                  EqualsTermMetadata("fool", 1))));
 
   ICING_ASSERT_OK(index_->Merge());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"t",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
-                                       EqualsTermMetadata("term-five", 5),
-                                       EqualsTermMetadata("term-four", 4),
-                                       EqualsTermMetadata("term-three", 3),
-                                       EqualsTermMetadata("term-two", 2),
-                                       EqualsTermMetadata("term-one", 1))));
-
-  // keep push terms to the lite index. We will add 2 document to term-five,
-  // term-three and term-one. The output order should be 5-6-3-4-1-2.
-  Index::Editor edit7 =
-      index_->Edit(kDocumentId7, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit7.BufferTerm("term-one"), IsOk());
-  EXPECT_THAT(edit7.BufferTerm("term-three"), IsOk());
-  EXPECT_THAT(edit7.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit7.IndexAllBufferedTerms(), IsOk());
-
-  Index::Editor edit8 =
-      index_->Edit(kDocumentId8, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit8.BufferTerm("term-one"), IsOk());
-  EXPECT_THAT(edit8.BufferTerm("term-three"), IsOk());
-  EXPECT_THAT(edit8.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit8.IndexAllBufferedTerms(), IsOk());
-
-  // verify the combination of lite index and main index is in correct order.
-  EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"t", /*num_to_return=*/10,
-                                TermMatchType::PREFIX, &impl),
-      IsOkAndHolds(ElementsAre(
-          EqualsTermMetadata("term-five", 7), EqualsTermMetadata("term-six", 6),
-          EqualsTermMetadata("term-three", 5),
-          EqualsTermMetadata("term-four", 4), EqualsTermMetadata("term-one", 3),
-          EqualsTermMetadata("term-two", 2))));
-
-  // Get the first three terms.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"t",
-                                        /*num_to_return=*/3,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-five", 7),
-                                       EqualsTermMetadata("term-six", 6),
-                                       EqualsTermMetadata("term-three", 5))));
+  // Should return "fo", "foo" and "fool" across all namespaces.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{},
+                                        /*num_to_return=*/10),
+              IsOkAndHolds(UnorderedElementsAre(
+                  EqualsTermMetadata("fo", kMinSizePlApproxHits),
+                  EqualsTermMetadata("foo", kMinSizePlApproxHits),
+                  EqualsTermMetadata("fool", kMinSizePlApproxHits))));
 }
 
-TEST_F(IndexTest, FindTermByPrefix_InTermMatchTypePrefix_ShouldReturnInOrder) {
+TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
   Index::Editor edit1 =
-      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX,
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
-  EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit2 =
-      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::PREFIX,
+      index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
-  Index::Editor edit3 =
-      index_->Edit(kDocumentId3, kSectionId2, TermMatchType::PREFIX,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
-  EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
+  // 'foo' has 1 hit, 'fool' has 2 hits.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                /*num_to_return=*/10),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 2))));
 
   ICING_ASSERT_OK(index_->Merge());
-  // verify the order in pls is correct
-  // "fo"    { {doc0, exact_hit}, {doc1, prefix_hit}, {doc2, prefix_hit} }
-  // "foo"   { {doc1, exact_hit}, {doc2, prefix_hit} }
-  // "fool"  { {doc2, exact_hit} }
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
-                                       EqualsTermMetadata("foo", 2),
-                                       EqualsTermMetadata("fool", 1))));
-  // Find by exact only, all terms should be equally.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*num_to_return=*/10,
-                                        TermMatchType::EXACT_ONLY, &impl),
+
+  // foo's one hit should fit on a min-sized pl, fool's two hits should also fit
+  // on a min-sized pl.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/10),
               IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
-                  EqualsTermMetadata("fool", 1))));
+                  EqualsTermMetadata("foo", kMinSizePlApproxHits),
+                  EqualsTermMetadata("fool", kMinSizePlApproxHits))));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnHitCountForMain) {
+TEST_F(IndexTest, FindTermByPrefixShouldReturnApproximateHitCountForMain) {
   Index::Editor edit =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
@@ -1243,26 +1160,26 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnHitCountForMain) {
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // 'foo' has 1 hit, 'fool' has 8 hits.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
-                                        /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 8),
-                                       EqualsTermMetadata("foo", 1))));
-
-  ICING_ASSERT_OK(index_->Merge());
-
   EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*num_to_return=*/10,
-                                TermMatchType::PREFIX, &impl),
+      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                /*num_to_return=*/10),
       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
                                         EqualsTermMetadata("fool", 8))));
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  // foo's hits should fit on a single pl. fool's hits will need two pls.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/10),
+              IsOkAndHolds(UnorderedElementsAre(
+                  EqualsTermMetadata("foo", kMinSizePlApproxHits),
+                  EqualsTermMetadata("fool", kSecondSmallestPlApproxHits))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
   Index::Editor edit =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
@@ -1274,18 +1191,19 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*num_to_return=*/10,
-                                        TermMatchType::PREFIX, &impl),
-              IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
-                                       EqualsTermMetadata("foo", 1))));
+  // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the main index and
+  // 1 hit in the lite index.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/10),
+              IsOkAndHolds(UnorderedElementsAre(
+                  EqualsTermMetadata("foo", kMinSizePlApproxHits),
+                  EqualsTermMetadata("fool", kMinSizePlApproxHits + 1))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
   Index::Editor edit =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  AlwaysTrueNamespaceCheckerImpl impl;
-
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
@@ -1297,11 +1215,11 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the lite index.
-  EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*num_to_return=*/10,
-                                TermMatchType::PREFIX, &impl),
-      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
-                                        EqualsTermMetadata("fool", 1))));
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+                                        /*num_to_return=*/10),
+              IsOkAndHolds(UnorderedElementsAre(
+                  EqualsTermMetadata("foo", kMinSizePlApproxHits),
+                  EqualsTermMetadata("fool", 1))));
 }
 
 TEST_F(IndexTest, GetElementsSize) {
@@ -1395,14 +1313,12 @@ TEST_F(IndexTest, GetDebugInfo) {
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
   edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
                       /*namespace_id=*/0);
-  index_->set_last_added_document_id(kDocumentId1);
   ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
   ICING_ASSERT_OK(index_->Merge());
 
   edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
                       /*namespace_id=*/0);
-  index_->set_last_added_document_id(kDocumentId2);
   ASSERT_THAT(edit.BufferTerm("footer"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
   edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
@@ -1410,45 +1326,40 @@ TEST_F(IndexTest, GetDebugInfo) {
   ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  IndexDebugInfoProto out0 = index_->GetDebugInfo(/*verbosity=*/0);
-  EXPECT_FALSE(out0.main_index_info().has_flash_index_storage_info());
-  EXPECT_THAT(out0.main_index_info().last_added_document_id(),
-              Eq(kDocumentId1));
-  EXPECT_THAT(out0.lite_index_info().curr_size(), Eq(2));
-  EXPECT_THAT(out0.lite_index_info().last_added_document_id(),
-              Eq(kDocumentId2));
+  std::string out0;
+  index_->GetDebugInfo(/*verbosity=*/0, &out0);
+  EXPECT_THAT(out0, Not(IsEmpty()));
 
-  IndexDebugInfoProto out1 = index_->GetDebugInfo(/*verbosity=*/1);
-  EXPECT_THAT(out1.main_index_info().flash_index_storage_info(),
-              Not(IsEmpty()));
+  std::string out1;
+  index_->GetDebugInfo(/*verbosity=*/1, &out1);
+  EXPECT_THAT(out1, SizeIs(Gt(out0.size())));
 
   // Add one more doc to the lite index. Debug strings should change.
   edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
                       /*namespace_id=*/0);
-  index_->set_last_added_document_id(kDocumentId3);
   ASSERT_THAT(edit.BufferTerm("far"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  IndexDebugInfoProto out2 = index_->GetDebugInfo(/*verbosity=*/0);
-  EXPECT_THAT(out2.lite_index_info().curr_size(), Eq(3));
-  EXPECT_THAT(out2.lite_index_info().last_added_document_id(),
-              Eq(kDocumentId3));
+  std::string out2;
+  index_->GetDebugInfo(/*verbosity=*/0, &out2);
+  EXPECT_THAT(out2, Ne(out0));
+
+  std::string out3;
+  index_->GetDebugInfo(/*verbosity=*/1, &out3);
+  EXPECT_THAT(out3, Ne(out1));
 
   // Merge into the man index. Debuug strings should change again.
   ICING_ASSERT_OK(index_->Merge());
 
-  IndexDebugInfoProto out3 = index_->GetDebugInfo(/*verbosity=*/0);
-  EXPECT_TRUE(out3.has_index_storage_info());
-  EXPECT_THAT(out3.main_index_info().lexicon_info(), Not(IsEmpty()));
-  EXPECT_THAT(out3.main_index_info().last_added_document_id(),
-              Eq(kDocumentId3));
-  EXPECT_THAT(out3.lite_index_info().curr_size(), Eq(0));
-  EXPECT_THAT(out3.lite_index_info().hit_buffer_size(), Gt(0));
-  EXPECT_THAT(out3.lite_index_info().last_added_document_id(),
-              Eq(kInvalidDocumentId));
-  EXPECT_THAT(out3.lite_index_info().searchable_end(), Eq(0));
-  EXPECT_THAT(out3.lite_index_info().index_crc(), Gt(0));
-  EXPECT_THAT(out3.lite_index_info().lexicon_info(), Not(IsEmpty()));
+  std::string out4;
+  index_->GetDebugInfo(/*verbosity=*/0, &out4);
+  EXPECT_THAT(out4, Ne(out0));
+  EXPECT_THAT(out4, Ne(out2));
+
+  std::string out5;
+  index_->GetDebugInfo(/*verbosity=*/1, &out5);
+  EXPECT_THAT(out5, Ne(out1));
+  EXPECT_THAT(out5, Ne(out3));
 }
 
 TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index 543e9ef..66f87bd 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -14,7 +14,8 @@
 
 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
 
-#include <cstddef>
+#include <stddef.h>
+
 #include <cstdint>
 #include <memory>
 #include <string>
@@ -161,7 +162,6 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
         DocumentId unused;
         ICING_ASSIGN_OR_RETURN(
             unused, AdvanceTo(iterator.get(), potential_document_id));
-        (void)unused;  // Silence unused warning.
       }
 
       if (iterator->doc_hit_info().document_id() == potential_document_id) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 7c6d924..43a846b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -48,13 +48,13 @@ using ::testing::ElementsAreArray;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
 
 class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
  protected:
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index f215d63..d535d7f 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -45,13 +45,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
   if (cached_hits_idx_ == -1) {
     libtextclassifier3::Status status = RetrieveMoreHits();
     if (!status.ok()) {
-      if (!absl_ports::IsNotFound(status)) {
-        // NOT_FOUND is expected to happen (not every term will be in the main
-        // index!). Other errors are worth logging.
-        ICING_LOG(ERROR)
-            << "Encountered unexpected failure while retrieving  hits "
-            << status.error_message();
-      }
+      ICING_LOG(ERROR) << "Failed to retrieve more hits "
+                       << status.error_message();
       return absl_ports::ResourceExhaustedError(
           "No more DocHitInfos in iterator");
     }
@@ -77,8 +72,7 @@ libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
   ICING_ASSIGN_OR_RETURN(uint32_t term_id,
                          term_id_codec_->EncodeTvi(tvi, TviType::LITE));
   lite_index_->AppendHits(term_id, section_restrict_mask_,
-                          /*only_from_prefix_sections=*/false,
-                          /*namespace_checker=*/nullptr, &cached_hits_);
+                          /*only_from_prefix_sections=*/false, &cached_hits_);
   cached_hits_idx_ = 0;
   return libtextclassifier3::Status::OK;
 }
@@ -101,7 +95,7 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
         term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
     lite_index_->AppendHits(term_id, section_restrict_mask_,
                             /*only_from_prefix_sections=*/!exact_match,
-                            /*namespace_checker=*/nullptr, &cached_hits_);
+                            &cached_hits_);
     ++terms_matched;
   }
   if (terms_matched > 1) {
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index 179fc93..8dbe043 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -82,11 +82,6 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
 
  protected:
   // Add DocHitInfos corresponding to term_ to cached_hits_.
-  //
-  // Returns:
-  //   - OK, on success
-  //   - NOT_FOUND if no term matching term_ was found in the lexicon.
-  //   - INVALID_ARGUMENT if unable to properly encode the termid
   virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
 
   const std::string term_;
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index a5c6baf..fb23934 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -14,11 +14,12 @@
 
 #include "icing/index/lite/lite-index.h"
 
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <sys/mman.h>
 
 #include <algorithm>
-#include <cinttypes>
-#include <cstddef>
 #include <cstdint>
 #include <memory>
 #include <string>
@@ -336,12 +337,9 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
 
 int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
                           bool only_from_prefix_sections,
-                          const NamespaceChecker* namespace_checker,
                           std::vector<DocHitInfo>* hits_out) {
   int count = 0;
   DocumentId last_document_id = kInvalidDocumentId;
-  // Record whether the last document belongs to the given namespaces.
-  bool last_document_in_namespace = false;
   for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) {
     TermIdHitPair term_id_hit_pair(
         hit_buffer_.array_cast<TermIdHitPair>()[idx]);
@@ -358,31 +356,22 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
     }
     DocumentId document_id = hit.document_id();
     if (document_id != last_document_id) {
-      last_document_id = document_id;
-      last_document_in_namespace =
-          namespace_checker == nullptr ||
-          namespace_checker->BelongsToTargetNamespaces(document_id);
-      if (!last_document_in_namespace) {
-        // The document is removed or expired or not belongs to target
-        // namespaces.
-        continue;
-      }
       ++count;
       if (hits_out != nullptr) {
         hits_out->push_back(DocHitInfo(document_id));
       }
+      last_document_id = document_id;
     }
-    if (hits_out != nullptr && last_document_in_namespace) {
+    if (hits_out != nullptr) {
       hits_out->back().UpdateSection(hit.section_id(), hit.term_frequency());
     }
   }
   return count;
 }
 
-libtextclassifier3::StatusOr<int> LiteIndex::CountHits(
-    uint32_t term_id, const NamespaceChecker* namespace_checker) {
+int LiteIndex::CountHits(uint32_t term_id) {
   return AppendHits(term_id, kSectionIdMaskAll,
-                    /*only_from_prefix_sections=*/false, namespace_checker,
+                    /*only_from_prefix_sections=*/false,
                     /*hits_out=*/nullptr);
 }
 
@@ -391,16 +380,15 @@ bool LiteIndex::is_full() const {
           lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction));
 }
 
-IndexDebugInfoProto::LiteIndexDebugInfoProto LiteIndex::GetDebugInfo(
-    int verbosity) {
-  IndexDebugInfoProto::LiteIndexDebugInfoProto res;
-  res.set_curr_size(header_->cur_size());
-  res.set_hit_buffer_size(options_.hit_buffer_size);
-  res.set_last_added_document_id(header_->last_added_docid());
-  res.set_searchable_end(header_->searchable_end());
-  res.set_index_crc(ComputeChecksum().Get());
-  lexicon_.GetDebugInfo(verbosity, res.mutable_lexicon_info());
-  return res;
+void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const {
+  absl_ports::StrAppend(
+      out, IcingStringUtil::StringPrintf("Lite Index\nHit buffer %u/%u\n",
+                                         header_->cur_size(),
+                                         options_.hit_buffer_size));
+
+  // Lexicon.
+  out->append("Lexicon stats:\n");
+  lexicon_.GetDebugInfo(verbosity, out);
 }
 
 libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
@@ -421,8 +409,12 @@ IndexStorageInfoProto LiteIndex::GetStorageInfo(
     IndexStorageInfoProto storage_info) const {
   int64_t header_and_hit_buffer_file_size =
       filesystem_->GetFileSize(hit_buffer_fd_.get());
-  storage_info.set_lite_index_hit_buffer_size(
-      IcingFilesystem::SanitizeFileSize(header_and_hit_buffer_file_size));
+  if (header_and_hit_buffer_file_size != Filesystem::kBadFileSize) {
+    storage_info.set_lite_index_hit_buffer_size(
+        header_and_hit_buffer_file_size);
+  } else {
+    storage_info.set_lite_index_hit_buffer_size(-1);
+  }
   int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
   if (lexicon_disk_usage != Filesystem::kBadFileSize) {
     storage_info.set_lite_index_lexicon_size(lexicon_disk_usage);
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 378fc94..b134aba 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -37,12 +37,10 @@
 #include "icing/legacy/index/icing-lite-index-header.h"
 #include "icing/legacy/index/icing-lite-index-options.h"
 #include "icing/legacy/index/icing-mmapper.h"
-#include "icing/proto/debug.pb.h"
 #include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
-#include "icing/store/namespace-checker.h"
 #include "icing/store/namespace-id.h"
 #include "icing/util/bit-util.h"
 #include "icing/util/crc32.h"
@@ -142,19 +140,13 @@ class LiteIndex {
   // skipping hits in non-prefix sections if only_from_prefix_sections is true,
   // to hits_out. If hits_out is nullptr, no hits will be added.
   //
-  // Only those hits which belongs to the given namespaces will be counted and
-  // appended. A nullptr namespace checker  will disable this check.
-  //
   // Returns the number of hits that would be added to hits_out.
   int AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
                  bool only_from_prefix_sections,
-                 const NamespaceChecker* namespace_checker,
                  std::vector<DocHitInfo>* hits_out);
 
   // Returns the hit count of the term.
-  // Only those hits which belongs to the given namespaces will be counted.
-  libtextclassifier3::StatusOr<int> CountHits(
-      uint32_t term_id, const NamespaceChecker* namespace_checker);
+  int CountHits(uint32_t term_id);
 
   // Check if buffer has reached its capacity.
   bool is_full() const;
@@ -242,7 +234,7 @@ class LiteIndex {
   // Returns debug information for the index in out.
   // verbosity <= 0, simplest debug information - size of lexicon, hit buffer
   // verbosity > 0, more detailed debug information from the lexicon.
-  IndexDebugInfoProto::LiteIndexDebugInfoProto GetDebugInfo(int verbosity);
+  void GetDebugInfo(int verbosity, std::string* out) const;
 
   // Returns the byte size of all the elements held in the index. This excludes
   // the size of any internal metadata of the index, e.g. the index's header.
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
deleted file mode 100644
index 825f830..0000000
--- a/icing/index/lite/lite-index_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/index/lite/lite-index.h"
-
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/index/term-id-codec.h"
-#include "icing/legacy/index/icing-mock-filesystem.h"
-#include "icing/schema/section.h"
-#include "icing/store/namespace-checker.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/tmp-directory.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-using ::testing::Eq;
-using ::testing::IsEmpty;
-using ::testing::SizeIs;
-
-class AlwaysFalseNamespaceCheckerImpl : public NamespaceChecker {
- public:
-  bool BelongsToTargetNamespaces(DocumentId document_id) const override {
-    return false;
-  }
-};
-
-class LiteIndexTest : public testing::Test {
- protected:
-  void SetUp() override {
-    index_dir_ = GetTestTempDir() + "/test_dir";
-    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
-
-    std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
-    LiteIndex::Options options(lite_index_file_name,
-                               /*hit_buffer_want_merge_bytes=*/1024 * 1024);
-    ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
-                               LiteIndex::Create(options, &icing_filesystem_));
-
-    ICING_ASSERT_OK_AND_ASSIGN(
-        term_id_codec_,
-        TermIdCodec::Create(
-            IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
-            IcingDynamicTrie::max_value_index(options.lexicon_options)));
-  }
-
-  void TearDown() override {
-    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
-  }
-
-  std::string index_dir_;
-  Filesystem filesystem_;
-  IcingFilesystem icing_filesystem_;
-  std::unique_ptr<LiteIndex> lite_index_;
-  std::unique_ptr<TermIdCodec> term_id_codec_;
-};
-
-constexpr NamespaceId kNamespace0 = 0;
-
-TEST_F(LiteIndexTest, LiteIndexAppendHits) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      uint32_t tvi,
-      lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
-  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
-                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
-  Hit doc_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
-  Hit doc_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
-  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
-  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
-
-  std::vector<DocHitInfo> hits1;
-  lite_index_->AppendHits(foo_term_id, kSectionIdMaskAll,
-                          /*only_from_prefix_sections=*/false,
-                          /*namespace_checker=*/nullptr, &hits1);
-  EXPECT_THAT(hits1, SizeIs(1));
-  EXPECT_THAT(hits1.back().document_id(), Eq(0));
-  // Check that the hits are coming from section 0 and section 1.
-  EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
-
-  std::vector<DocHitInfo> hits2;
-  AlwaysFalseNamespaceCheckerImpl always_false_namespace_checker;
-  lite_index_->AppendHits(foo_term_id, kSectionIdMaskAll,
-                          /*only_from_prefix_sections=*/false,
-                          &always_false_namespace_checker, &hits2);
-  // Check that no hits are returned because they get skipped by the namespace
-  // checker.
-  EXPECT_THAT(hits2, IsEmpty());
-}
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index 98bc18e..5553c1e 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -57,9 +57,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
       if (!absl_ports::IsNotFound(status)) {
         // NOT_FOUND is expected to happen (not every term will be in the main
         // index!). Other errors are worth logging.
-        ICING_LOG(ERROR)
-            << "Encountered unexpected failure while retrieving  hits "
-            << status.error_message();
+        ICING_LOG(ERROR) << "Failed to retrieve more hits "
+                         << status.error_message();
       }
       return absl_ports::ResourceExhaustedError(
           "No more DocHitInfos in iterator");
diff --git a/icing/index/main/flash-index-storage.cc b/icing/index/main/flash-index-storage.cc
index 3c52375..f125b6d 100644
--- a/icing/index/main/flash-index-storage.cc
+++ b/icing/index/main/flash-index-storage.cc
@@ -14,11 +14,11 @@
 
 #include "icing/index/main/flash-index-storage.h"
 
+#include <errno.h>
+#include <inttypes.h>
 #include <sys/types.h>
 
 #include <algorithm>
-#include <cerrno>
-#include <cinttypes>
 #include <cstdint>
 #include <memory>
 #include <unordered_set>
diff --git a/icing/index/main/flash-index-storage.h b/icing/index/main/flash-index-storage.h
index 6c6fbb8..8d5b50b 100644
--- a/icing/index/main/flash-index-storage.h
+++ b/icing/index/main/flash-index-storage.h
@@ -159,7 +159,6 @@ class FlashIndexStorage {
 
   libtextclassifier3::Status Reset();
 
-  // TODO(b/222349894) Convert the string output to a protocol buffer instead.
   void GetDebugInfo(int verbosity, std::string* out) const;
 
  private:
diff --git a/icing/index/main/flash-index-storage_test.cc b/icing/index/main/flash-index-storage_test.cc
index 25fcaad..7e15524 100644
--- a/icing/index/main/flash-index-storage_test.cc
+++ b/icing/index/main/flash-index-storage_test.cc
@@ -14,10 +14,10 @@
 
 #include "icing/index/main/flash-index-storage.h"
 
+#include <stdlib.h>
 #include <unistd.h>
 
 #include <algorithm>
-#include <cstdlib>
 #include <limits>
 #include <utility>
 #include <vector>
diff --git a/icing/index/main/index-block.cc b/icing/index/main/index-block.cc
index c6ab345..4590d06 100644
--- a/icing/index/main/index-block.cc
+++ b/icing/index/main/index-block.cc
@@ -14,8 +14,9 @@
 
 #include "icing/index/main/index-block.h"
 
+#include <inttypes.h>
+
 #include <algorithm>
-#include <cinttypes>
 #include <limits>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
diff --git a/icing/index/main/index-block.h b/icing/index/main/index-block.h
index 5d75a2a..edf9a79 100644
--- a/icing/index/main/index-block.h
+++ b/icing/index/main/index-block.h
@@ -15,10 +15,10 @@
 #ifndef ICING_INDEX_MAIN_INDEX_BLOCK_H_
 #define ICING_INDEX_MAIN_INDEX_BLOCK_H_
 
+#include <string.h>
 #include <sys/mman.h>
 
 #include <algorithm>
-#include <cstring>
 #include <limits>
 #include <memory>
 #include <string>
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 2d6007b..8ae6b27 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -133,10 +133,18 @@ libtextclassifier3::StatusOr<int64_t> MainIndex::GetElementsSize() const {
 
 IndexStorageInfoProto MainIndex::GetStorageInfo(
     IndexStorageInfoProto storage_info) const {
-  storage_info.set_main_index_lexicon_size(
-      IcingFilesystem::SanitizeFileSize(main_lexicon_->GetElementsSize()));
-  storage_info.set_main_index_storage_size(
-      Filesystem::SanitizeFileSize(flash_index_storage_->GetElementsSize()));
+  int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
+  if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+    storage_info.set_main_index_lexicon_size(lexicon_elt_size);
+  } else {
+    storage_info.set_main_index_lexicon_size(-1);
+  }
+  int64_t index_elt_size = flash_index_storage_->GetElementsSize();
+  if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+    storage_info.set_main_index_storage_size(index_elt_size);
+  } else {
+    storage_info.set_main_index_storage_size(-1);
+  }
   storage_info.set_main_index_block_size(flash_index_storage_->block_size());
   storage_info.set_num_blocks(flash_index_storage_->num_blocks());
   storage_info.set_min_free_fraction(flash_index_storage_->min_free_fraction());
@@ -178,7 +186,7 @@ MainIndex::GetAccessorForPrefixTerm(const std::string& prefix) {
   if (!exact && !hits_in_prefix_section.HasProperty(main_itr.GetValueIndex())) {
     // Found it, but it doesn't have prefix hits. Exit early. No need to
     // retrieve the posting list because there's nothing there for us.
-    return absl_ports::NotFoundError("The term doesn't have any prefix hits.");
+    return libtextclassifier3::Status::OK;
   }
   PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
   memcpy(&posting_list_id, main_itr.GetValue(), sizeof(posting_list_id));
@@ -209,48 +217,46 @@ bool IsTermInNamespaces(
 
 libtextclassifier3::StatusOr<std::vector<TermMetadata>>
 MainIndex::FindTermsByPrefix(const std::string& prefix,
-                             TermMatchType::Code term_match_type,
-                             const NamespaceChecker* namespace_checker) {
+                             const std::vector<NamespaceId>& namespace_ids,
+                             int num_to_return) {
   // Finds all the terms that start with the given prefix in the lexicon.
   IcingDynamicTrie::Iterator term_iterator(*main_lexicon_, prefix.c_str());
 
+  // A property reader to help check if a term has some property.
+  IcingDynamicTrie::PropertyReadersAll property_reader(*main_lexicon_);
+
   std::vector<TermMetadata> term_metadata_list;
-  while (term_iterator.IsValid()) {
-    int count = 0;
-    DocumentId last_document_id = kInvalidDocumentId;
+  while (term_iterator.IsValid() && term_metadata_list.size() < num_to_return) {
+    uint32_t term_value_index = term_iterator.GetValueIndex();
 
+    // Skips the terms that don't exist in the given namespaces. We won't skip
+    // any terms if namespace_ids is empty.
+    if (!IsTermInNamespaces(property_reader, term_value_index, namespace_ids)) {
+      term_iterator.Advance();
+      continue;
+    }
     PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
     memcpy(&posting_list_id, term_iterator.GetValue(), sizeof(posting_list_id));
-    ICING_ASSIGN_OR_RETURN(PostingListAccessor pl_accessor,
-                           PostingListAccessor::CreateFromExisting(
-                               flash_index_storage_.get(), posting_list_id));
-    ICING_ASSIGN_OR_RETURN(std::vector<Hit> hits,
-                           pl_accessor.GetNextHitsBatch());
-    for (const Hit& hit : hits) {
-      DocumentId document_id = hit.document_id();
-      if (document_id != last_document_id) {
-        last_document_id = document_id;
-        if (term_match_type == TermMatchType::EXACT_ONLY &&
-            hit.is_prefix_hit()) {
-          continue;
-        }
-        if (!namespace_checker->BelongsToTargetNamespaces(document_id)) {
-          // The document is removed or expired or not belongs to target
-          // namespaces.
-          continue;
-        }
-        // TODO(b/152934343) Add search type in SuggestionSpec to ask user to
-        // input search type, prefix or exact. And make different score strategy
-        // base on that.
-        ++count;
-      }
-    }
-    if (count > 0) {
-      term_metadata_list.push_back(TermMetadata(term_iterator.GetKey(), count));
-    }
+    // Getting the actual hit count would require reading the entire posting
+    // list chain. We take an approximation to avoid all of those IO ops.
+    // Because we are not reading the posting lists, it is impossible to
+    // differentiate between single max-size posting lists and chains of
+    // max-size posting lists. We assume that the impact on scoring is not
+    // significant.
+    int approx_hit_count = IndexBlock::ApproximateFullPostingListHitsForBlock(
+        flash_index_storage_->block_size(),
+        posting_list_id.posting_list_index_bits());
+    term_metadata_list.emplace_back(term_iterator.GetKey(), approx_hit_count);
 
     term_iterator.Advance();
   }
+  if (term_iterator.IsValid()) {
+    // We exited the loop above because we hit the num_to_return limit.
+    ICING_LOG(WARNING) << "Ran into limit of " << num_to_return
+                       << " retrieving suggestions for " << prefix
+                       << ". Some suggestions may not be returned and others "
+                          "may be misranked.";
+  }
   return term_metadata_list;
 }
 
@@ -607,22 +613,16 @@ libtextclassifier3::Status MainIndex::AddPrefixBackfillHits(
   return libtextclassifier3::Status::OK;
 }
 
-IndexDebugInfoProto::MainIndexDebugInfoProto MainIndex::GetDebugInfo(
-    int verbosity) const {
-  IndexDebugInfoProto::MainIndexDebugInfoProto res;
-
+void MainIndex::GetDebugInfo(int verbosity, std::string* out) const {
   // Lexicon.
-  main_lexicon_->GetDebugInfo(verbosity, res.mutable_lexicon_info());
-
-  res.set_last_added_document_id(last_added_document_id());
+  out->append("Main Lexicon stats:\n");
+  main_lexicon_->GetDebugInfo(verbosity, out);
 
   if (verbosity <= 0) {
-    return res;
+    return;
   }
 
-  flash_index_storage_->GetDebugInfo(verbosity,
-                                     res.mutable_flash_index_storage_info());
-  return res;
+  flash_index_storage_->GetDebugInfo(verbosity, out);
 }
 
 }  // namespace lib
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index abb0418..43635ca 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -27,9 +27,7 @@
 #include "icing/index/term-metadata.h"
 #include "icing/legacy/index/icing-dynamic-trie.h"
 #include "icing/legacy/index/icing-filesystem.h"
-#include "icing/proto/debug.pb.h"
 #include "icing/proto/storage.pb.h"
-#include "icing/store/namespace-checker.h"
 #include "icing/store/namespace-id.h"
 #include "icing/util/status-macros.h"
 
@@ -73,17 +71,18 @@ class MainIndex {
   // Finds terms with the given prefix in the given namespaces. If
   // 'namespace_ids' is empty, returns results from all the namespaces. The
   // input prefix must be normalized, otherwise inaccurate results may be
-  // returned. If term_match_type is EXACT, only exact hit will be counted and
-  // it is PREFIX, both prefix and exact hits will be counted. Results are not
-  // sorted specifically and are in lexigraphical order. Number of results are
-  // no more than 'num_to_return'.
+  // returned. Results are not sorted specifically and are in lexigraphical
+  // order. Number of results are no more than 'num_to_return'.
+  //
+  // The hit count returned with each TermMetadata is an approximation based of
+  // posting list size.
   //
   // Returns:
   //   A list of TermMetadata on success
   //   INTERNAL_ERROR if failed to access term data.
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
-      const std::string& prefix, TermMatchType::Code term_match_type,
-      const NamespaceChecker* namespace_checker);
+      const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
+      int num_to_return);
 
   struct LexiconMergeOutputs {
     // Maps from main_lexicon tvi for new branching point to the main_lexicon
@@ -186,8 +185,7 @@ class MainIndex {
   // verbosity <= 0, simplest debug information - just the lexicon
   // verbosity > 0, more detailed debug information including raw postings
   //                lists.
-  IndexDebugInfoProto::MainIndexDebugInfoProto GetDebugInfo(
-      int verbosity) const;
+  void GetDebugInfo(int verbosity, std::string* out) const;
 
  private:
   libtextclassifier3::Status Init(const std::string& index_directory,
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index fa83d68..74139be 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -162,34 +162,6 @@ TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsValidAccessor) {
   EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"), IsOk());
 }
 
-TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsNotFound) {
-  // 1. Index one doc in the Lite Index:
-  // - Doc0 {"foot" is_in_prefix_section=false}
-  ICING_ASSERT_OK_AND_ASSIGN(
-      uint32_t tvi,
-      lite_index_->InsertTerm("foot", TermMatchType::EXACT_ONLY, kNamespace0));
-  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
-                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
-
-  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
-  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
-
-  // 2. Create the main index. It should have no entries in its lexicon.
-  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<MainIndex> main_index,
-      MainIndex::Create(main_index_file_name, &filesystem_,
-                        &icing_filesystem_));
-
-  // 3. Merge the index. The main index should return not found when we search
-  // prefix contain "foo".
-  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
-  // GetAccessorForPrefixTerm should return a valid accessor for "foo".
-  EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
 TEST_F(MainIndexTest, MainIndexGetAccessorForExactTermNotFound) {
   // Create the main index. It should have no entries in its lexicon.
   std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
diff --git a/icing/index/main/posting-list-free.h b/icing/index/main/posting-list-free.h
index 75b99d7..4b27401 100644
--- a/icing/index/main/posting-list-free.h
+++ b/icing/index/main/posting-list-free.h
@@ -15,10 +15,10 @@
 #ifndef ICING_INDEX_MAIN_POSTING_LIST_FREE_H_
 #define ICING_INDEX_MAIN_POSTING_LIST_FREE_H_
 
+#include <string.h>
 #include <sys/mman.h>
 
 #include <cstdint>
-#include <cstring>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
@@ -115,7 +115,7 @@ class PostingListFree {
   // bytes which will store the next posting list index, the rest are unused and
   // can be anything.
   uint8_t *posting_list_buffer_;
-  [[maybe_unused]] uint32_t size_in_bytes_;
+  uint32_t size_in_bytes_;
 
   static_assert(sizeof(PostingListIndex) <=
                     posting_list_utils::min_posting_list_size(),
diff --git a/icing/index/main/posting-list-used.h b/icing/index/main/posting-list-used.h
index 8944034..1b2e24e 100644
--- a/icing/index/main/posting-list-used.h
+++ b/icing/index/main/posting-list-used.h
@@ -15,10 +15,10 @@
 #ifndef ICING_INDEX_MAIN_POSTING_LIST_USED_H_
 #define ICING_INDEX_MAIN_POSTING_LIST_USED_H_
 
+#include <string.h>
 #include <sys/mman.h>
 
 #include <algorithm>
-#include <cstring>
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index bcc35e6..ea2bcf7 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -166,7 +166,6 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType(
       env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
   icing::lib::GetSchemaTypeResultProto get_schema_type_result_proto =
       icing->GetSchemaType(native_schema_type);
-  env->ReleaseStringUTFChars(schema_type, native_schema_type);
 
   return SerializeProtoToJniByteArray(env, get_schema_type_result_proto);
 }
@@ -193,20 +192,19 @@ JNIEXPORT jbyteArray JNICALL
 Java_com_google_android_icing_IcingSearchEngine_nativeGet(
     JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri,
     jbyteArray result_spec_bytes) {
-  icing::lib::GetResultSpecProto get_result_spec;
-  if (!ParseProtoFromJniByteArray(env, result_spec_bytes, &get_result_spec)) {
-    ICING_LOG(ERROR) << "Failed to parse GetResultSpecProto in nativeGet";
-    return nullptr;
-  }
   icing::lib::IcingSearchEngine* icing =
       GetIcingSearchEnginePointer(env, object);
+
   const char* native_name_space =
       env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
   const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr);
+  icing::lib::GetResultSpecProto get_result_spec;
+  if (!ParseProtoFromJniByteArray(env, result_spec_bytes, &get_result_spec)) {
+    ICING_LOG(ERROR) << "Failed to parse GetResultSpecProto in nativeGet";
+    return nullptr;
+  }
   icing::lib::GetResultProto get_result_proto =
       icing->Get(native_name_space, native_uri, get_result_spec);
-  env->ReleaseStringUTFChars(uri, native_uri);
-  env->ReleaseStringUTFChars(name_space, native_name_space);
 
   return SerializeProtoToJniByteArray(env, get_result_proto);
 }
@@ -308,8 +306,6 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDelete(
   const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr);
   icing::lib::DeleteResultProto delete_result_proto =
       icing->Delete(native_name_space, native_uri);
-  env->ReleaseStringUTFChars(uri, native_uri);
-  env->ReleaseStringUTFChars(name_space, native_name_space);
 
   return SerializeProtoToJniByteArray(env, delete_result_proto);
 }
@@ -324,7 +320,6 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace(
       env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
   icing::lib::DeleteByNamespaceResultProto delete_by_namespace_result_proto =
       icing->DeleteByNamespace(native_name_space);
-  env->ReleaseStringUTFChars(name_space, native_name_space);
 
   return SerializeProtoToJniByteArray(env, delete_by_namespace_result_proto);
 }
@@ -339,7 +334,6 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType(
       env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
   icing::lib::DeleteBySchemaTypeResultProto delete_by_schema_type_result_proto =
       icing->DeleteBySchemaType(native_schema_type);
-  env->ReleaseStringUTFChars(schema_type, native_schema_type);
 
   return SerializeProtoToJniByteArray(env, delete_by_schema_type_result_proto);
 }
@@ -426,23 +420,4 @@ Java_com_google_android_icing_IcingSearchEngine_nativeReset(
   return SerializeProtoToJniByteArray(env, reset_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSearchSuggestions(
-    JNIEnv* env, jclass clazz, jobject object,
-    jbyteArray suggestion_spec_bytes) {
-  icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(env, object);
-
-  icing::lib::SuggestionSpecProto suggestion_spec_proto;
-  if (!ParseProtoFromJniByteArray(env, suggestion_spec_bytes,
-                                  &suggestion_spec_proto)) {
-    ICING_LOG(ERROR) << "Failed to parse SuggestionSpecProto in nativeSearch";
-    return nullptr;
-  }
-  icing::lib::SuggestionResponse suggestionResponse =
-      icing->SearchSuggestions(suggestion_spec_proto);
-
-  return SerializeProtoToJniByteArray(env, suggestionResponse);
-}
-
 }  // extern "C"
diff --git a/icing/legacy/core/icing-core-types.h b/icing/legacy/core/icing-core-types.h
index 7db8408..cc12663 100644
--- a/icing/legacy/core/icing-core-types.h
+++ b/icing/legacy/core/icing-core-types.h
@@ -21,8 +21,9 @@
 #ifndef ICING_LEGACY_CORE_ICING_CORE_TYPES_H_
 #define ICING_LEGACY_CORE_ICING_CORE_TYPES_H_
 
+#include <stdint.h>
+
 #include <cstddef>  // size_t not defined implicitly for all platforms.
-#include <cstdint>
 #include <vector>
 
 #include "icing/legacy/core/icing-compat.h"
diff --git a/icing/legacy/core/icing-string-util.cc b/icing/legacy/core/icing-string-util.cc
index ed06e03..2eb64ac 100644
--- a/icing/legacy/core/icing-string-util.cc
+++ b/icing/legacy/core/icing-string-util.cc
@@ -13,11 +13,12 @@
 // limitations under the License.
 #include "icing/legacy/core/icing-string-util.h"
 
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
 #include <algorithm>
-#include <cstdarg>
-#include <cstddef>
-#include <cstdint>
-#include <cstdio>
 #include <string>
 
 #include "icing/legacy/portable/icing-zlib.h"
diff --git a/icing/legacy/core/icing-string-util.h b/icing/legacy/core/icing-string-util.h
index e5e4941..767e581 100644
--- a/icing/legacy/core/icing-string-util.h
+++ b/icing/legacy/core/icing-string-util.h
@@ -15,8 +15,9 @@
 #ifndef ICING_LEGACY_CORE_ICING_STRING_UTIL_H_
 #define ICING_LEGACY_CORE_ICING_STRING_UTIL_H_
 
-#include <cstdarg>
-#include <cstdint>
+#include <stdarg.h>
+#include <stdint.h>
+
 #include <string>
 
 #include "icing/legacy/core/icing-compat.h"
diff --git a/icing/legacy/core/icing-timer.h b/icing/legacy/core/icing-timer.h
index af38912..49ba9ad 100644
--- a/icing/legacy/core/icing-timer.h
+++ b/icing/legacy/core/icing-timer.h
@@ -16,8 +16,7 @@
 #define ICING_LEGACY_CORE_ICING_TIMER_H_
 
 #include <sys/time.h>
-
-#include <ctime>
+#include <time.h>
 
 namespace icing {
 namespace lib {
diff --git a/icing/legacy/index/icing-array-storage.cc b/icing/legacy/index/icing-array-storage.cc
index 4d2ef67..b462135 100644
--- a/icing/legacy/index/icing-array-storage.cc
+++ b/icing/legacy/index/icing-array-storage.cc
@@ -14,10 +14,10 @@
 
 #include "icing/legacy/index/icing-array-storage.h"
 
+#include <inttypes.h>
 #include <sys/mman.h>
 
 #include <algorithm>
-#include <cinttypes>
 
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/legacy/core/icing-timer.h"
diff --git a/icing/legacy/index/icing-array-storage.h b/icing/legacy/index/icing-array-storage.h
index 0d93172..fad0565 100644
--- a/icing/legacy/index/icing-array-storage.h
+++ b/icing/legacy/index/icing-array-storage.h
@@ -20,7 +20,8 @@
 #ifndef ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
 #define ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
 
-#include <cstdint>
+#include <stdint.h>
+
 #include <string>
 #include <vector>
 
diff --git a/icing/legacy/index/icing-bit-util.h b/icing/legacy/index/icing-bit-util.h
index d0c3f50..3273a68 100644
--- a/icing/legacy/index/icing-bit-util.h
+++ b/icing/legacy/index/icing-bit-util.h
@@ -20,8 +20,9 @@
 #ifndef ICING_LEGACY_INDEX_ICING_BIT_UTIL_H_
 #define ICING_LEGACY_INDEX_ICING_BIT_UTIL_H_
 
-#include <cstdint>
-#include <cstdio>
+#include <stdint.h>
+#include <stdio.h>
+
 #include <limits>
 #include <vector>
 
diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc
index 77876c4..29843ba 100644
--- a/icing/legacy/index/icing-dynamic-trie.cc
+++ b/icing/legacy/index/icing-dynamic-trie.cc
@@ -62,16 +62,15 @@
 
 #include "icing/legacy/index/icing-dynamic-trie.h"
 
+#include <errno.h>
 #include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
 #include <algorithm>
-#include <cerrno>
-#include <cinttypes>
-#include <cstdint>
-#include <cstring>
 #include <memory>
 #include <utility>
 
@@ -398,8 +397,6 @@ class IcingDynamicTrie::IcingDynamicTrieStorage {
   // storage.
   IcingScopedFd array_fds_[NUM_ARRAY_TYPES];
   std::vector<IcingArrayStorage> array_storage_;
-
-  // Legacy file system. Switch to use the new Filesystem class instead.
   const IcingFilesystem *filesystem_;
 };
 
@@ -1367,12 +1364,10 @@ uint32_t IcingDynamicTrie::size() const {
   return storage_->hdr().num_keys();
 }
 
-void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats,
-                                             uint32_t depth) const {
+void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
+                                             Stats *stats) const {
   if (node.is_leaf()) {
     stats->num_leaves++;
-    stats->sum_depth += depth;
-    stats->max_depth = max(stats->max_depth, depth);
     const char *suffix = storage_->GetSuffix(node.next_index());
     stats->suffixes_used += strlen(suffix) + 1 + value_size();
     if (!suffix[0]) {
@@ -1384,16 +1379,13 @@ void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats,
     for (; i < (1U << node.log2_num_children()); i++) {
       const Next &next = *storage_->GetNext(node.next_index(), i);
       if (next.node_index() == kInvalidNodeIndex) break;
-      CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats,
-                            depth + 1);
+      CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats);
     }
 
     // At least one valid node in each next array
     if (i == 0) {
       ICING_LOG(FATAL) << "No valid node in 'next' array";
     }
-    stats->sum_children += i;
-    stats->max_children = max(stats->max_children, i);
 
     stats->child_counts[i - 1]++;
     stats->wasted[node.log2_num_children()] +=
@@ -1475,12 +1467,9 @@ std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const {
         "Wasted total: %u\n"
         "Num intermediates %u num leaves %u "
         "suffixes used %u null %u\n"
-        "avg and max children for intermediates: %.3f, %u\n"
-        "avg and max depth for leaves: %.3f, %u\n"
         "Total next frag: %.3f%%\n",
         total_wasted, num_intermediates, num_leaves, suffixes_used,
-        null_suffixes, 1. * sum_children / num_intermediates, max_children,
-        1. * sum_depth / num_leaves, max_depth,
+        null_suffixes,
         100. * math_util::SafeDivide((total_free + total_wasted), num_nexts));
   }
   IcingStringUtil::SStringAppendF(
diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h
index 013b926..7fe290b 100644
--- a/icing/legacy/index/icing-dynamic-trie.h
+++ b/icing/legacy/index/icing-dynamic-trie.h
@@ -35,7 +35,8 @@
 #ifndef ICING_LEGACY_INDEX_ICING_DYNAMIC_TRIE_H_
 #define ICING_LEGACY_INDEX_ICING_DYNAMIC_TRIE_H_
 
-#include <cstdint>
+#include <stdint.h>
+
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -152,13 +153,8 @@ class IcingDynamicTrie : public IIcingStorage {
     uint32_t max_nodes;
     // Count of intermediate nodes.
     uint32_t num_intermediates;
-    // Total and maximum number of children of intermediate nodes.
-    uint32_t sum_children, max_children;
-
     // Count of leaf nodes.
     uint32_t num_leaves;
-    // Total and maximum depth of leaf nodes.
-    uint32_t sum_depth, max_depth;
 
     // Next stats
 
@@ -191,7 +187,6 @@ class IcingDynamicTrie : public IIcingStorage {
     uint32_t dirty_pages_nexts;
     uint32_t dirty_pages_suffixes;
 
-    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
     std::string DumpStats(int verbosity) const;
   };
 
@@ -607,8 +602,7 @@ class IcingDynamicTrie : public IIcingStorage {
   static const uint32_t kInvalidSuffixIndex;
 
   // Stats helpers.
-  void CollectStatsRecursive(const Node &node, Stats *stats,
-                             uint32_t depth = 0) const;
+  void CollectStatsRecursive(const Node &node, Stats *stats) const;
 
   // Helpers for Find and Insert.
   const Next *GetNextByChar(const Node *node, uint8_t key_char) const;
diff --git a/icing/legacy/index/icing-filesystem.cc b/icing/legacy/index/icing-filesystem.cc
index 4f5e571..90e9146 100644
--- a/icing/legacy/index/icing-filesystem.cc
+++ b/icing/legacy/index/icing-filesystem.cc
@@ -16,6 +16,7 @@
 
 #include <dirent.h>
 #include <dlfcn.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <fnmatch.h>
 #include <pthread.h>
@@ -26,7 +27,6 @@
 #include <unistd.h>
 
 #include <algorithm>
-#include <cerrno>
 #include <unordered_set>
 
 #include "icing/absl_ports/str_cat.h"
diff --git a/icing/legacy/index/icing-filesystem.h b/icing/legacy/index/icing-filesystem.h
index ce75a82..f645632 100644
--- a/icing/legacy/index/icing-filesystem.h
+++ b/icing/legacy/index/icing-filesystem.h
@@ -224,11 +224,6 @@ class IcingFilesystem {
   // Increments to_increment by size if size is valid, or sets to_increment
   // to kBadFileSize if either size or to_increment is kBadFileSize.
   static void IncrementByOrSetInvalid(uint64_t size, uint64_t *to_increment);
-
-  // Return -1 if file_size is invalid. Otherwise, return file_size.
-  static int64_t SanitizeFileSize(int64_t file_size) {
-    return (file_size != kBadFileSize) ? file_size : -1;
-  }
 };
 
 }  // namespace lib
diff --git a/icing/legacy/index/icing-flash-bitmap.h b/icing/legacy/index/icing-flash-bitmap.h
index 6bb9591..3b3521a 100644
--- a/icing/legacy/index/icing-flash-bitmap.h
+++ b/icing/legacy/index/icing-flash-bitmap.h
@@ -37,7 +37,8 @@
 #ifndef ICING_LEGACY_INDEX_ICING_FLASH_BITMAP_H_
 #define ICING_LEGACY_INDEX_ICING_FLASH_BITMAP_H_
 
-#include <cstdint>
+#include <stdint.h>
+
 #include <memory>
 #include <string>
 
@@ -138,7 +139,6 @@ class IcingFlashBitmap {
   // Upgrade for version 18.
   bool UpgradeTo18();
 
-  // Legacy file system. Switch to use the new Filesystem class instead.
   const IcingFilesystem *const filesystem_;
   std::string filename_;
   OpenType open_type_;
diff --git a/icing/legacy/index/icing-mmapper.cc b/icing/legacy/index/icing-mmapper.cc
index 7946c82..737335c 100644
--- a/icing/legacy/index/icing-mmapper.cc
+++ b/icing/legacy/index/icing-mmapper.cc
@@ -17,11 +17,10 @@
 //
 #include "icing/legacy/index/icing-mmapper.h"
 
+#include <errno.h>
+#include <string.h>
 #include <sys/mman.h>
 
-#include <cerrno>
-#include <cstring>
-
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/util/logging.h"
diff --git a/icing/legacy/index/icing-mock-filesystem.h b/icing/legacy/index/icing-mock-filesystem.h
index 122ee7b..75ac62f 100644
--- a/icing/legacy/index/icing-mock-filesystem.h
+++ b/icing/legacy/index/icing-mock-filesystem.h
@@ -15,15 +15,16 @@
 #ifndef ICING_LEGACY_INDEX_ICING_MOCK_FILESYSTEM_H_
 #define ICING_LEGACY_INDEX_ICING_MOCK_FILESYSTEM_H_
 
-#include <cstdint>
-#include <cstdio>
-#include <cstring>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
 #include <memory>
 #include <string>
 #include <vector>
 
-#include "gmock/gmock.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "gmock/gmock.h"
 
 namespace icing {
 namespace lib {
diff --git a/icing/legacy/index/icing-storage-file.cc b/icing/legacy/index/icing-storage-file.cc
index 35a4418..b27ec67 100644
--- a/icing/legacy/index/icing-storage-file.cc
+++ b/icing/legacy/index/icing-storage-file.cc
@@ -14,9 +14,9 @@
 
 #include "icing/legacy/index/icing-storage-file.h"
 
+#include <inttypes.h>
 #include <unistd.h>
 
-#include <cinttypes>
 #include <string>
 
 #include "icing/legacy/core/icing-compat.h"
diff --git a/icing/portable/endian.h b/icing/portable/endian.h
index ecebb15..42f6c02 100644
--- a/icing/portable/endian.h
+++ b/icing/portable/endian.h
@@ -12,12 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-// Utility functions that depend on bytesex. We define versions of htonll and
-// ntohll (HostToNetworkLL and NetworkToHostLL in our naming), as well as
-// "Google" versions of all the standards: ghtonl, ghtons, and so on
-// (GHostToNetworkL, GHostToNetworkS, etc in our naming). These functions do
-// exactly the same as their standard variants, but don't require including the
-// dangerous netinet/in.h.
+// Utility functions that depend on bytesex. We define htonll and ntohll,
+// as well as "Google" versions of all the standards: ghtonl, ghtons, and
+// so on. These functions do exactly the same as their standard variants,
+// but don't require including the dangerous netinet/in.h.
 
 #ifndef ICING_PORTABLE_ENDIAN_H_
 #define ICING_PORTABLE_ENDIAN_H_
@@ -77,7 +75,7 @@
 
 // The following guarantees declaration of the byte swap functions
 #ifdef COMPILER_MSVC
-#include <cstdlib>  // NOLINT(build/include)
+#include <stdlib.h>  // NOLINT(build/include)
 
 #define bswap_16(x) _byteswap_ushort(x)
 #define bswap_32(x) _byteswap_ulong(x)
@@ -172,37 +170,37 @@ inline uint16 gbswap_16(uint16 host_int) { return bswap_16(host_int); }
 // correctly handle the (rather involved) definitions of bswap_32.
 // gcc guarantees that inline functions are as fast as macros, so
 // this isn't a performance hit.
-inline uint16_t GHostToNetworkS(uint16_t x) { return gbswap_16(x); }
-inline uint32_t GHostToNetworkL(uint32_t x) { return gbswap_32(x); }
-inline uint64_t GHostToNetworkLL(uint64_t x) { return gbswap_64(x); }
+inline uint16_t ghtons(uint16_t x) { return gbswap_16(x); }
+inline uint32_t ghtonl(uint32_t x) { return gbswap_32(x); }
+inline uint64_t ghtonll(uint64_t x) { return gbswap_64(x); }
 
 #elif defined IS_BIG_ENDIAN
 
 // These definitions are simpler on big-endian machines
 // These are functions instead of macros to avoid self-assignment warnings
 // on calls such as "i = ghtnol(i);".  This also provides type checking.
-inline uint16 GHostToNetworkS(uint16 x) { return x; }
-inline uint32 GHostToNetworkL(uint32 x) { return x; }
-inline uint64 GHostToNetworkLL(uint64 x) { return x; }
+inline uint16 ghtons(uint16 x) { return x; }
+inline uint32 ghtonl(uint32 x) { return x; }
+inline uint64 ghtonll(uint64 x) { return x; }
 
 #else  // bytesex
 #error \
     "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined"  // NOLINT
 #endif  // bytesex
 
-#ifndef HostToNetworkLL
+#ifndef htonll
 // With the rise of 64-bit, some systems are beginning to define this.
-#define HostToNetworkLL(x) GHostToNetworkLL(x)
-#endif  // HostToNetworkLL
+#define htonll(x) ghtonll(x)
+#endif  // htonll
 
 // ntoh* and hton* are the same thing for any size and bytesex,
 // since the function is an involution, i.e., its own inverse.
-inline uint16_t GNetworkToHostS(uint16_t x) { return GHostToNetworkS(x); }
-inline uint32_t GNetworkToHostL(uint32_t x) { return GHostToNetworkL(x); }
-inline uint64_t GNetworkToHostLL(uint64_t x) { return GHostToNetworkLL(x); }
+inline uint16_t gntohs(uint16_t x) { return ghtons(x); }
+inline uint32_t gntohl(uint32_t x) { return ghtonl(x); }
+inline uint64_t gntohll(uint64_t x) { return ghtonll(x); }
 
-#ifndef NetworkToHostLL
-#define NetworkToHostLL(x) GHostToNetworkLL(x)
-#endif  // NetworkToHostLL
+#ifndef ntohll
+#define ntohll(x) htonll(x)
+#endif  // ntohll
 
 #endif  // ICING_PORTABLE_ENDIAN_H_
diff --git a/icing/portable/gzip_stream.cc b/icing/portable/gzip_stream.cc
deleted file mode 100644
index f00a993..0000000
--- a/icing/portable/gzip_stream.cc
+++ /dev/null
@@ -1,313 +0,0 @@
-// Copyright (C) 2009 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This file contains the implementation of classes GzipInputStream and
-// GzipOutputStream. It is forked from protobuf because these classes are only
-// provided in libprotobuf-full but we would like to link libicing against the
-// smaller libprotobuf-lite instead.
-
-#include "icing/portable/gzip_stream.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-namespace protobuf_ports {
-
-static const int kDefaultBufferSize = 65536;
-
-GzipInputStream::GzipInputStream(ZeroCopyInputStream* sub_stream, Format format,
-                                 int buffer_size)
-    : format_(format), sub_stream_(sub_stream), zerror_(Z_OK), byte_count_(0) {
-  zcontext_.state = Z_NULL;
-  zcontext_.zalloc = Z_NULL;
-  zcontext_.zfree = Z_NULL;
-  zcontext_.opaque = Z_NULL;
-  zcontext_.total_out = 0;
-  zcontext_.next_in = NULL;
-  zcontext_.avail_in = 0;
-  zcontext_.total_in = 0;
-  zcontext_.msg = NULL;
-  if (buffer_size == -1) {
-    output_buffer_length_ = kDefaultBufferSize;
-  } else {
-    output_buffer_length_ = buffer_size;
-  }
-  output_buffer_ = operator new(output_buffer_length_);
-  zcontext_.next_out = static_cast<Bytef*>(output_buffer_);
-  zcontext_.avail_out = output_buffer_length_;
-  output_position_ = output_buffer_;
-}
-GzipInputStream::~GzipInputStream() {
-  operator delete(output_buffer_);
-  zerror_ = inflateEnd(&zcontext_);
-}
-
-static inline int internalInflateInit2(z_stream* zcontext,
-                                       GzipInputStream::Format format) {
-  int windowBitsFormat = 0;
-  switch (format) {
-    case GzipInputStream::GZIP:
-      windowBitsFormat = 16;
-      break;
-    case GzipInputStream::AUTO:
-      windowBitsFormat = 32;
-      break;
-    case GzipInputStream::ZLIB:
-      windowBitsFormat = 0;
-      break;
-  }
-  return inflateInit2(zcontext, /* windowBits */ 15 | windowBitsFormat);
-}
-
-int GzipInputStream::Inflate(int flush) {
-  if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) {
-    // previous inflate filled output buffer. don't change input params yet.
-  } else if (zcontext_.avail_in == 0) {
-    const void* in;
-    int in_size;
-    bool first = zcontext_.next_in == NULL;
-    bool ok = sub_stream_->Next(&in, &in_size);
-    if (!ok) {
-      zcontext_.next_out = NULL;
-      zcontext_.avail_out = 0;
-      return Z_STREAM_END;
-    }
-    zcontext_.next_in = static_cast<Bytef*>(const_cast<void*>(in));
-    zcontext_.avail_in = in_size;
-    if (first) {
-      int error = internalInflateInit2(&zcontext_, format_);
-      if (error != Z_OK) {
-        return error;
-      }
-    }
-  }
-  zcontext_.next_out = static_cast<Bytef*>(output_buffer_);
-  zcontext_.avail_out = output_buffer_length_;
-  output_position_ = output_buffer_;
-  int error = inflate(&zcontext_, flush);
-  return error;
-}
-
-void GzipInputStream::DoNextOutput(const void** data, int* size) {
-  *data = output_position_;
-  *size = ((uintptr_t)zcontext_.next_out) - ((uintptr_t)output_position_);
-  output_position_ = zcontext_.next_out;
-}
-
-// implements ZeroCopyInputStream ----------------------------------
-bool GzipInputStream::Next(const void** data, int* size) {
-  bool ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END) ||
-            (zerror_ == Z_BUF_ERROR);
-  if ((!ok) || (zcontext_.next_out == NULL)) {
-    return false;
-  }
-  if (zcontext_.next_out != output_position_) {
-    DoNextOutput(data, size);
-    return true;
-  }
-  if (zerror_ == Z_STREAM_END) {
-    if (zcontext_.next_out != NULL) {
-      // sub_stream_ may have concatenated streams to follow
-      zerror_ = inflateEnd(&zcontext_);
-      byte_count_ += zcontext_.total_out;
-      if (zerror_ != Z_OK) {
-        return false;
-      }
-      zerror_ = internalInflateInit2(&zcontext_, format_);
-      if (zerror_ != Z_OK) {
-        return false;
-      }
-    } else {
-      *data = NULL;
-      *size = 0;
-      return false;
-    }
-  }
-  zerror_ = Inflate(Z_NO_FLUSH);
-  if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) {
-    // The underlying stream's Next returned false inside Inflate.
-    return false;
-  }
-  ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END) ||
-       (zerror_ == Z_BUF_ERROR);
-  if (!ok) {
-    return false;
-  }
-  DoNextOutput(data, size);
-  return true;
-}
-void GzipInputStream::BackUp(int count) {
-  output_position_ = reinterpret_cast<void*>(
-      reinterpret_cast<uintptr_t>(output_position_) - count);
-}
-bool GzipInputStream::Skip(int count) {
-  const void* data;
-  int size = 0;
-  bool ok = Next(&data, &size);
-  while (ok && (size < count)) {
-    count -= size;
-    ok = Next(&data, &size);
-  }
-  if (size > count) {
-    BackUp(size - count);
-  }
-  return ok;
-}
-int64_t GzipInputStream::ByteCount() const {
-  int64_t ret = byte_count_ + zcontext_.total_out;
-  if (zcontext_.next_out != NULL && output_position_ != NULL) {
-    ret += reinterpret_cast<uintptr_t>(zcontext_.next_out) -
-           reinterpret_cast<uintptr_t>(output_position_);
-  }
-  return ret;
-}
-
-// =========================================================================
-
-GzipOutputStream::Options::Options()
-    : format(GZIP),
-      buffer_size(kDefaultBufferSize),
-      compression_level(Z_DEFAULT_COMPRESSION),
-      compression_strategy(Z_DEFAULT_STRATEGY) {}
-
-GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream) {
-  Init(sub_stream, Options());
-}
-
-GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream,
-                                   const Options& options) {
-  Init(sub_stream, options);
-}
-
-void GzipOutputStream::Init(ZeroCopyOutputStream* sub_stream,
-                            const Options& options) {
-  sub_stream_ = sub_stream;
-  sub_data_ = NULL;
-  sub_data_size_ = 0;
-
-  input_buffer_length_ = options.buffer_size;
-  input_buffer_ = operator new(input_buffer_length_);
-
-  zcontext_.zalloc = Z_NULL;
-  zcontext_.zfree = Z_NULL;
-  zcontext_.opaque = Z_NULL;
-  zcontext_.next_out = NULL;
-  zcontext_.avail_out = 0;
-  zcontext_.total_out = 0;
-  zcontext_.next_in = NULL;
-  zcontext_.avail_in = 0;
-  zcontext_.total_in = 0;
-  zcontext_.msg = NULL;
-  // default to GZIP format
-  int windowBitsFormat = 16;
-  if (options.format == ZLIB) {
-    windowBitsFormat = 0;
-  }
-  zerror_ =
-      deflateInit2(&zcontext_, options.compression_level, Z_DEFLATED,
-                   /* windowBits */ 15 | windowBitsFormat,
-                   /* memLevel (default) */ 8, options.compression_strategy);
-}
-
-GzipOutputStream::~GzipOutputStream() {
-  Close();
-  operator delete(input_buffer_);
-}
-
-// private
-int GzipOutputStream::Deflate(int flush) {
-  int error = Z_OK;
-  do {
-    if ((sub_data_ == NULL) || (zcontext_.avail_out == 0)) {
-      bool ok = sub_stream_->Next(&sub_data_, &sub_data_size_);
-      if (!ok) {
-        sub_data_ = NULL;
-        sub_data_size_ = 0;
-        return Z_BUF_ERROR;
-      }
-      if (sub_data_size_ <= 0) {
-        ICING_LOG(FATAL) << "Failed to advance underlying stream";
-      }
-      zcontext_.next_out = static_cast<Bytef*>(sub_data_);
-      zcontext_.avail_out = sub_data_size_;
-    }
-    error = deflate(&zcontext_, flush);
-  } while (error == Z_OK && zcontext_.avail_out == 0);
-  if ((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) {
-    // Notify lower layer of data.
-    sub_stream_->BackUp(zcontext_.avail_out);
-    // We don't own the buffer anymore.
-    sub_data_ = NULL;
-    sub_data_size_ = 0;
-  }
-  return error;
-}
-
-// implements ZeroCopyOutputStream ---------------------------------
-bool GzipOutputStream::Next(void** data, int* size) {
-  if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) {
-    return false;
-  }
-  if (zcontext_.avail_in != 0) {
-    zerror_ = Deflate(Z_NO_FLUSH);
-    if (zerror_ != Z_OK) {
-      return false;
-    }
-  }
-  if (zcontext_.avail_in == 0) {
-    // all input was consumed. reset the buffer.
-    zcontext_.next_in = static_cast<Bytef*>(input_buffer_);
-    zcontext_.avail_in = input_buffer_length_;
-    *data = input_buffer_;
-    *size = input_buffer_length_;
-  } else {
-    // The loop in Deflate should consume all avail_in
-    ICING_LOG(ERROR) << "Deflate left bytes unconsumed";
-  }
-  return true;
-}
-void GzipOutputStream::BackUp(int count) {
-  if (zcontext_.avail_in < static_cast<uInt>(count)) {
-    ICING_LOG(FATAL) << "Not enough data to back up " << count << " bytes";
-  }
-  zcontext_.avail_in -= count;
-}
-int64_t GzipOutputStream::ByteCount() const {
-  return zcontext_.total_in + zcontext_.avail_in;
-}
-
-bool GzipOutputStream::Flush() {
-  zerror_ = Deflate(Z_FULL_FLUSH);
-  // Return true if the flush succeeded or if it was a no-op.
-  return (zerror_ == Z_OK) ||
-         (zerror_ == Z_BUF_ERROR && zcontext_.avail_in == 0 &&
-          zcontext_.avail_out != 0);
-}
-
-bool GzipOutputStream::Close() {
-  if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) {
-    return false;
-  }
-  do {
-    zerror_ = Deflate(Z_FINISH);
-  } while (zerror_ == Z_OK);
-  zerror_ = deflateEnd(&zcontext_);
-  bool ok = zerror_ == Z_OK;
-  zerror_ = Z_STREAM_END;
-  return ok;
-}
-
-}  // namespace protobuf_ports
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/portable/gzip_stream.h b/icing/portable/gzip_stream.h
deleted file mode 100644
index 602093f..0000000
--- a/icing/portable/gzip_stream.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright (C) 2009 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This file contains the definition for classes GzipInputStream and
-// GzipOutputStream. It is forked from protobuf because these classes are only
-// provided in libprotobuf-full but we would like to link libicing against the
-// smaller libprotobuf-lite instead.
-//
-// GzipInputStream decompresses data from an underlying
-// ZeroCopyInputStream and provides the decompressed data as a
-// ZeroCopyInputStream.
-//
-// GzipOutputStream is an ZeroCopyOutputStream that compresses data to
-// an underlying ZeroCopyOutputStream.
-
-#ifndef GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
-#define GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
-
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
-#include "icing/portable/zlib.h"
-
-namespace icing {
-namespace lib {
-namespace protobuf_ports {
-
-// A ZeroCopyInputStream that reads compressed data through zlib
-class GzipInputStream : public google::protobuf::io::ZeroCopyInputStream {
- public:
-  // Format key for constructor
-  enum Format {
-    // zlib will autodetect gzip header or deflate stream
-    AUTO = 0,
-
-    // GZIP streams have some extra header data for file attributes.
-    GZIP = 1,
-
-    // Simpler zlib stream format.
-    ZLIB = 2,
-  };
-
-  // buffer_size and format may be -1 for default of 64kB and GZIP format
-  explicit GzipInputStream(
-      google::protobuf::io::ZeroCopyInputStream* sub_stream,
-      Format format = AUTO, int buffer_size = -1);
-  virtual ~GzipInputStream();
-
-  // Return last error message or NULL if no error.
-  inline const char* ZlibErrorMessage() const { return zcontext_.msg; }
-  inline int ZlibErrorCode() const { return zerror_; }
-
-  // implements ZeroCopyInputStream ----------------------------------
-  bool Next(const void** data, int* size) override;
-  void BackUp(int count) override;
-  bool Skip(int count) override;
-  int64_t ByteCount() const override;
-
- private:
-  Format format_;
-
-  google::protobuf::io::ZeroCopyInputStream* sub_stream_;
-
-  z_stream zcontext_;
-  int zerror_;
-
-  void* output_buffer_;
-  void* output_position_;
-  size_t output_buffer_length_;
-  int64_t byte_count_;
-
-  int Inflate(int flush);
-  void DoNextOutput(const void** data, int* size);
-};
-
-class GzipOutputStream : public google::protobuf::io::ZeroCopyOutputStream {
- public:
-  // Format key for constructor
-  enum Format {
-    // GZIP streams have some extra header data for file attributes.
-    GZIP = 1,
-
-    // Simpler zlib stream format.
-    ZLIB = 2,
-  };
-
-  struct Options {
-    // Defaults to GZIP.
-    Format format;
-
-    // What size buffer to use internally.  Defaults to 64kB.
-    int buffer_size;
-
-    // A number between 0 and 9, where 0 is no compression and 9 is best
-    // compression.  Defaults to Z_DEFAULT_COMPRESSION (see zlib.h).
-    int compression_level;
-
-    // Defaults to Z_DEFAULT_STRATEGY.  Can also be set to Z_FILTERED,
-    // Z_HUFFMAN_ONLY, or Z_RLE.  See the documentation for deflateInit2 in
-    // zlib.h for definitions of these constants.
-    int compression_strategy;
-
-    Options();  // Initializes with default values.
-  };
-
-  // Create a GzipOutputStream with default options.
-  explicit GzipOutputStream(
-      google::protobuf::io::ZeroCopyOutputStream* sub_stream);
-
-  // Create a GzipOutputStream with the given options.
-  GzipOutputStream(
-      google::protobuf::io::ZeroCopyOutputStream* sub_stream,
-      const Options& options);
-
-  virtual ~GzipOutputStream();
-
-  // Return last error message or NULL if no error.
-  inline const char* ZlibErrorMessage() const { return zcontext_.msg; }
-  inline int ZlibErrorCode() const { return zerror_; }
-
-  // Flushes data written so far to zipped data in the underlying stream.
-  // It is the caller's responsibility to flush the underlying stream if
-  // necessary.
-  // Compression may be less efficient stopping and starting around flushes.
-  // Returns true if no error.
-  //
-  // Please ensure that block size is > 6. Here is an excerpt from the zlib
-  // doc that explains why:
-  //
-  // In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out
-  // is greater than six to avoid repeated flush markers due to
-  // avail_out == 0 on return.
-  bool Flush();
-
-  // Writes out all data and closes the gzip stream.
-  // It is the caller's responsibility to close the underlying stream if
-  // necessary.
-  // Returns true if no error.
-  bool Close();
-
-  // implements ZeroCopyOutputStream ---------------------------------
-  bool Next(void** data, int* size) override;
-  void BackUp(int count) override;
-  int64_t ByteCount() const override;
-
- private:
-  google::protobuf::io::ZeroCopyOutputStream* sub_stream_;
-  // Result from calling Next() on sub_stream_
-  void* sub_data_;
-  int sub_data_size_;
-
-  z_stream zcontext_;
-  int zerror_;
-  void* input_buffer_;
-  size_t input_buffer_length_;
-
-  // Shared constructor code.
-  void Init(
-      google::protobuf::io::ZeroCopyOutputStream* sub_stream,
-      const Options& options);
-
-  // Do some compression.
-  // Takes zlib flush mode.
-  // Returns zlib error code.
-  int Deflate(int flush);
-};
-
-}  // namespace protobuf_ports
-}  // namespace lib
-}  // namespace icing
-
-#endif  // GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 36c76db..1f937fd 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -182,7 +182,7 @@ QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
     const Token& token = tokens.at(i);
     std::unique_ptr<DocHitInfoIterator> result_iterator;
 
-    // TODO(b/202076890): Handle negation tokens
+    // TODO(cassiewang): Handle negation tokens
     switch (token.type) {
       case Token::Type::QUERY_LEFT_PARENTHESES: {
         frames.emplace(ParserStateFrame());
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index e48fe78..bdd40aa 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -16,6 +16,7 @@
 #include "gmock/gmock.h"
 #include "third_party/absl/flags/flag.h"
 #include "icing/document-builder.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/index.h"
 #include "icing/proto/term.pb.h"
 #include "icing/query/query-processor.h"
@@ -23,7 +24,6 @@
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index eaa0efc..daeb479 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -23,6 +23,7 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/index.h"
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
@@ -39,7 +40,6 @@
 #include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -61,30 +61,28 @@ using ::testing::SizeIs;
 using ::testing::Test;
 using ::testing::UnorderedElementsAre;
 
-constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
-    PropertyConfigProto::DataType::STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
 
 class QueryProcessorTest : public Test {
  protected:
   QueryProcessorTest()
       : test_dir_(GetTestTempDir() + "/icing"),
         store_dir_(test_dir_ + "/store"),
-        schema_store_dir_(test_dir_ + "/schema_store"),
         index_dir_(test_dir_ + "/index") {}
 
   void SetUp() override {
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
-    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
 
     if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
       // If we've specified using the reverse-JNI method for segmentation (i.e.
@@ -131,7 +129,6 @@ class QueryProcessorTest : public Test {
   Filesystem filesystem_;
   const std::string test_dir_;
   const std::string store_dir_;
-  const std::string schema_store_dir_;
   std::unique_ptr<Index> index_;
   std::unique_ptr<LanguageSegmenter> language_segmenter_;
   std::unique_ptr<Normalizer> normalizer_;
@@ -179,7 +176,7 @@ TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -230,7 +227,7 @@ TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -281,7 +278,7 @@ TEST_F(QueryProcessorTest, QueryTermNormalized) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -357,7 +354,7 @@ TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -425,7 +422,7 @@ TEST_F(QueryProcessorTest, OneTermExactMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -493,7 +490,7 @@ TEST_F(QueryProcessorTest, AndSameTermExactMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -563,7 +560,7 @@ TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -638,7 +635,7 @@ TEST_F(QueryProcessorTest, AndSameTermPrefixMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -708,7 +705,7 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -784,7 +781,7 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -860,7 +857,7 @@ TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -949,7 +946,7 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1037,7 +1034,7 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1124,7 +1121,7 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1310,7 +1307,7 @@ TEST_F(QueryProcessorTest, OneGroup) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1386,7 +1383,7 @@ TEST_F(QueryProcessorTest, TwoGroups) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1464,7 +1461,7 @@ TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1540,7 +1537,7 @@ TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1617,7 +1614,7 @@ TEST_F(QueryProcessorTest, ExcludeTerm) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1682,7 +1679,7 @@ TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1745,7 +1742,7 @@ TEST_F(QueryProcessorTest, ExcludeAnd) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1835,7 +1832,7 @@ TEST_F(QueryProcessorTest, ExcludeOr) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -1931,7 +1928,7 @@ TEST_F(QueryProcessorTest, DeletedFilter) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2005,7 +2002,7 @@ TEST_F(QueryProcessorTest, NamespaceFilter) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2081,7 +2078,7 @@ TEST_F(QueryProcessorTest, SchemaTypeFilter) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2158,7 +2155,7 @@ TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2240,7 +2237,7 @@ TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2323,7 +2320,7 @@ TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2407,7 +2404,7 @@ TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2480,7 +2477,7 @@ TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2547,7 +2544,7 @@ TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2617,7 +2614,7 @@ TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -2692,7 +2689,7 @@ TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   // Arbitrary value, just has to be less than the document's creation
@@ -2751,7 +2748,7 @@ TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       schema_store_,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
   // Arbitrary value, just has to be greater than the document's creation
diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc
deleted file mode 100644
index cfa53f6..0000000
--- a/icing/query/suggestion-processor.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/query/suggestion-processor.h"
-
-#include "icing/tokenization/tokenizer-factory.h"
-#include "icing/tokenization/tokenizer.h"
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-
-libtextclassifier3::StatusOr<std::unique_ptr<SuggestionProcessor>>
-SuggestionProcessor::Create(Index* index,
-                            const LanguageSegmenter* language_segmenter,
-                            const Normalizer* normalizer) {
-  ICING_RETURN_ERROR_IF_NULL(index);
-  ICING_RETURN_ERROR_IF_NULL(language_segmenter);
-
-  return std::unique_ptr<SuggestionProcessor>(
-      new SuggestionProcessor(index, language_segmenter, normalizer));
-}
-
-libtextclassifier3::StatusOr<std::vector<TermMetadata>>
-SuggestionProcessor::QuerySuggestions(
-    const icing::lib::SuggestionSpecProto& suggestion_spec,
-    const NamespaceChecker* namespace_checker) {
-  // We use query tokenizer to tokenize the give prefix, and we only use the
-  // last token to be the suggestion prefix.
-  ICING_ASSIGN_OR_RETURN(
-      std::unique_ptr<Tokenizer> tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
-  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
-                         tokenizer->Tokenize(suggestion_spec.prefix()));
-
-  // If there are previous tokens, they are prepended to the suggestion,
-  // separated by spaces.
-  std::string last_token;
-  int token_start_pos;
-  while (iterator->Advance()) {
-    Token token = iterator->GetToken();
-    last_token = token.text;
-    token_start_pos = token.text.data() - suggestion_spec.prefix().c_str();
-  }
-
-  // If the position of the last token is not the end of the prefix, it means
-  // there should be some operator tokens after it and are ignored by the
-  // tokenizer.
-  bool is_last_token = token_start_pos + last_token.length() >=
-                       suggestion_spec.prefix().length();
-
-  if (!is_last_token || last_token.empty()) {
-    // We don't have a valid last token, return early.
-    return std::vector<TermMetadata>();
-  }
-
-  std::string query_prefix =
-      suggestion_spec.prefix().substr(0, token_start_pos);
-  // Run suggestion based on given SuggestionSpec.
-  // Normalize token text to lowercase since all tokens in the lexicon are
-  // lowercase.
-  ICING_ASSIGN_OR_RETURN(
-      std::vector<TermMetadata> terms,
-      index_.FindTermsByPrefix(
-          normalizer_.NormalizeTerm(last_token),
-          suggestion_spec.num_to_return(),
-          suggestion_spec.scoring_spec().scoring_match_type(),
-          namespace_checker));
-
-  for (TermMetadata& term : terms) {
-    term.content = query_prefix + term.content;
-  }
-  return terms;
-}
-
-SuggestionProcessor::SuggestionProcessor(
-    Index* index, const LanguageSegmenter* language_segmenter,
-    const Normalizer* normalizer)
-    : index_(*index),
-      language_segmenter_(*language_segmenter),
-      normalizer_(*normalizer) {}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/query/suggestion-processor.h b/icing/query/suggestion-processor.h
deleted file mode 100644
index 088863e..0000000
--- a/icing/query/suggestion-processor.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_QUERY_SUGGESTION_PROCESSOR_H_
-#define ICING_QUERY_SUGGESTION_PROCESSOR_H_
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/index/index.h"
-#include "icing/proto/search.pb.h"
-#include "icing/tokenization/language-segmenter.h"
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-
-// Processes SuggestionSpecProtos and retrieves the specified TermMedaData that
-// satisfies the prefix and its restrictions. This also performs ranking, and
-// returns TermMetaData ordered by their hit count.
-class SuggestionProcessor {
- public:
-  // Factory function to create a SuggestionProcessor which does not take
-  // ownership of any input components, and all pointers must refer to valid
-  // objects that outlive the created SuggestionProcessor instance.
-  //
-  // Returns:
-  //   An SuggestionProcessor on success
-  //   FAILED_PRECONDITION if any of the pointers is null.
-  static libtextclassifier3::StatusOr<std::unique_ptr<SuggestionProcessor>>
-  Create(Index* index, const LanguageSegmenter* language_segmenter,
-         const Normalizer* normalizer);
-
-  // Query suggestions based on the given SuggestionSpecProto.
-  //
-  // Returns:
-  //   On success,
-  //     - One vector that represents the entire TermMetadata
-  //   INTERNAL_ERROR on all other errors
-  libtextclassifier3::StatusOr<std::vector<TermMetadata>> QuerySuggestions(
-      const SuggestionSpecProto& suggestion_spec,
-      const NamespaceChecker* namespace_checker);
-
- private:
-  explicit SuggestionProcessor(Index* index,
-                               const LanguageSegmenter* language_segmenter,
-                               const Normalizer* normalizer);
-
-  // Not const because we could modify/sort the TermMetaData buffer in the lite
-  // index.
-  Index& index_;
-  const LanguageSegmenter& language_segmenter_;
-  const Normalizer& normalizer_;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_QUERY_SUGGESTION_PROCESSOR_H_
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
deleted file mode 100644
index ba4c90a..0000000
--- a/icing/query/suggestion-processor_test.cc
+++ /dev/null
@@ -1,326 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/query/suggestion-processor.h"
-
-#include "gmock/gmock.h"
-#include "icing/store/document-store.h"
-#include "icing/testing/always-true-namespace-checker-impl.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/fake-clock.h"
-#include "icing/testing/icu-data-file-helper.h"
-#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/test-data.h"
-#include "icing/testing/tmp-directory.h"
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/transform/normalizer-factory.h"
-#include "unicode/uloc.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-using ::testing::IsEmpty;
-using ::testing::Test;
-
-class SuggestionProcessorTest : public Test {
- protected:
-  SuggestionProcessorTest()
-      : test_dir_(GetTestTempDir() + "/icing"),
-        store_dir_(test_dir_ + "/store"),
-        index_dir_(test_dir_ + "/index") {}
-
-  void SetUp() override {
-    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
-    filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
-    filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
-
-    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
-      // If we've specified using the reverse-JNI method for segmentation (i.e.
-      // not ICU), then we won't have the ICU data file included to set up.
-      // Technically, we could choose to use reverse-JNI for segmentation AND
-      // include an ICU data file, but that seems unlikely and our current BUILD
-      // setup doesn't do this.
-      ICING_ASSERT_OK(
-          // File generated via icu_data_file rule in //icing/BUILD.
-          icu_data_file_helper::SetUpICUDataFile(
-              GetTestFilePath("icing/icu.dat")));
-    }
-
-    Index::Options options(index_dir_,
-                           /*index_merge_size=*/1024 * 1024);
-    ICING_ASSERT_OK_AND_ASSIGN(
-        index_, Index::Create(options, &filesystem_, &icing_filesystem_));
-
-    language_segmenter_factory::SegmenterOptions segmenter_options(
-        ULOC_US, jni_cache_.get());
-    ICING_ASSERT_OK_AND_ASSIGN(
-        language_segmenter_,
-        language_segmenter_factory::Create(segmenter_options));
-
-    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
-                                                /*max_term_byte_size=*/1000));
-
-    ICING_ASSERT_OK_AND_ASSIGN(
-        schema_store_,
-        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
-
-    ICING_ASSERT_OK_AND_ASSIGN(
-        DocumentStore::CreateResult create_result,
-        DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                              schema_store_.get()));
-  }
-
-  libtextclassifier3::Status AddTokenToIndex(
-      DocumentId document_id, SectionId section_id,
-      TermMatchType::Code term_match_type, const std::string& token) {
-    Index::Editor editor = index_->Edit(document_id, section_id,
-                                        term_match_type, /*namespace_id=*/0);
-    auto status = editor.BufferTerm(token.c_str());
-    return status.ok() ? editor.IndexAllBufferedTerms() : status;
-  }
-
-  void TearDown() override {
-    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
-  }
-
-  Filesystem filesystem_;
-  const std::string test_dir_;
-  const std::string store_dir_;
-  std::unique_ptr<Index> index_;
-  std::unique_ptr<LanguageSegmenter> language_segmenter_;
-  std::unique_ptr<Normalizer> normalizer_;
-  std::unique_ptr<SchemaStore> schema_store_;
-  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
-  FakeClock fake_clock_;
-
- private:
-  IcingFilesystem icing_filesystem_;
-  const std::string index_dir_;
-};
-
-constexpr DocumentId kDocumentId0 = 0;
-constexpr SectionId kSectionId2 = 2;
-
-TEST_F(SuggestionProcessorTest, PrependedPrefixTokenTest) {
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "foo"),
-              IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SuggestionProcessor> suggestion_processor,
-      SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
-                                  normalizer_.get()));
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix(
-      "prefix token should be prepended to the suggestion f");
-  suggestion_spec.set_num_to_return(10);
-
-  AlwaysTrueNamespaceCheckerImpl impl;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms.at(0).content,
-              "prefix token should be prepended to the suggestion foo");
-}
-
-TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "foo"),
-              IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SuggestionProcessor> suggestion_processor,
-      SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
-                                  normalizer_.get()));
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("nonExistTerm");
-  suggestion_spec.set_num_to_return(10);
-
-  AlwaysTrueNamespaceCheckerImpl impl;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-
-  EXPECT_THAT(terms, IsEmpty());
-}
-
-TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "foo"),
-              IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SuggestionProcessor> suggestion_processor,
-      SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
-                                  normalizer_.get()));
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("f    ");
-  suggestion_spec.set_num_to_return(10);
-
-  AlwaysTrueNamespaceCheckerImpl impl;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-
-  EXPECT_THAT(terms, IsEmpty());
-}
-
-TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "foo"),
-              IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SuggestionProcessor> suggestion_processor,
-      SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
-                                  normalizer_.get()));
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("F");
-  suggestion_spec.set_num_to_return(10);
-
-  AlwaysTrueNamespaceCheckerImpl impl;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms.at(0).content, "foo");
-
-  suggestion_spec.set_prefix("fO");
-  ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms.at(0).content, "foo");
-
-  suggestion_spec.set_prefix("Fo");
-  ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms.at(0).content, "foo");
-
-  suggestion_spec.set_prefix("FO");
-  ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms.at(0).content, "foo");
-}
-
-TEST_F(SuggestionProcessorTest, OrOperatorPrefixTest) {
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "foo"),
-              IsOk());
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "original"),
-              IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SuggestionProcessor> suggestion_processor,
-      SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
-                                  normalizer_.get()));
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("f OR");
-  suggestion_spec.set_num_to_return(10);
-
-  AlwaysTrueNamespaceCheckerImpl impl;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-
-  // Last Operator token will be used to query suggestion
-  EXPECT_THAT(terms.at(0).content, "f original");
-}
-
-TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "foo"),
-              IsOk());
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SuggestionProcessor> suggestion_processor,
-      SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
-                                  normalizer_.get()));
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("{f}");
-  suggestion_spec.set_num_to_return(10);
-
-  AlwaysTrueNamespaceCheckerImpl impl;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms, IsEmpty());
-
-  suggestion_spec.set_prefix("[f]");
-  ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms, IsEmpty());
-
-  suggestion_spec.set_prefix("(f)");
-  ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms, IsEmpty());
-}
-
-TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "foo"),
-              IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SuggestionProcessor> suggestion_processor,
-      SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
-                                  normalizer_.get()));
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("f:");
-  suggestion_spec.set_num_to_return(10);
-
-  AlwaysTrueNamespaceCheckerImpl impl;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms, IsEmpty());
-
-  suggestion_spec.set_prefix("f-");
-  ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms, IsEmpty());
-}
-
-TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
-  ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
-                              TermMatchType::EXACT_ONLY, "original"),
-              IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SuggestionProcessor> suggestion_processor,
-      SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
-                                  normalizer_.get()));
-
-  SuggestionSpecProto suggestion_spec;
-  suggestion_spec.set_prefix("OR OR - :");
-  suggestion_spec.set_num_to_return(10);
-
-  AlwaysTrueNamespaceCheckerImpl impl;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
-  EXPECT_THAT(terms, IsEmpty());
-}
-
-}  // namespace
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 0d812e4..1c9684d 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -22,6 +22,7 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/mock-filesystem.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
@@ -35,7 +36,6 @@
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/snippet-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -55,14 +55,14 @@ using ::testing::IsEmpty;
 using ::testing::Return;
 using ::testing::SizeIs;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
-constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
 
 class ResultRetrieverTest : public testing::Test {
  protected:
@@ -160,7 +160,7 @@ ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
   ResultSpecProto::SnippetSpecProto snippet_spec;
   snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max());
   snippet_spec.set_num_matches_per_property(std::numeric_limits<int>::max());
-  snippet_spec.set_max_window_utf32_length(1024);
+  snippet_spec.set_max_window_bytes(1024);
   return snippet_spec;
 }
 
@@ -362,8 +362,8 @@ TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
 
 TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) {
   MockFilesystem mock_filesystem;
-  ON_CALL(mock_filesystem, PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>()))
-      .WillByDefault(Return(false));
+  ON_CALL(mock_filesystem, OpenForRead(_)).WillByDefault(Return(false));
+
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 8a9005d..32e45aa 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -849,7 +849,7 @@ TEST_F(ResultStateManagerTest, ShouldGetSnippetContext) {
   ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
 
   SearchSpecProto search_spec;
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
@@ -884,7 +884,7 @@ TEST_F(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
   // 0 indicates no snippeting
   result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(0);
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(0);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(0);
 
   SearchSpecProto search_spec;
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
diff --git a/icing/result/result-state_test.cc b/icing/result/result-state_test.cc
index d92fcfa..f2121a5 100644
--- a/icing/result/result-state_test.cc
+++ b/icing/result/result-state_test.cc
@@ -143,7 +143,7 @@ TEST_F(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
   ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
 
   SectionRestrictQueryTermsMap query_terms_map;
   query_terms_map.emplace("term1", std::unordered_set<std::string>());
@@ -178,7 +178,7 @@ TEST_F(ResultStateTest, NoSnippetingShouldReturnNull) {
   // stored.
   result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
 
   SectionRestrictQueryTermsMap query_terms_map;
   query_terms_map.emplace("term1", std::unordered_set<std::string>());
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index bd1524e..2a138ec 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -41,7 +41,6 @@
 #include "icing/transform/normalizer.h"
 #include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
-#include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -76,81 +75,10 @@ inline std::string AddIndexToPath(int values_size, int index,
                             kRBracket);
 }
 
-// Returns a string of the normalized text of the input Token. Normalization
-// is applied based on the Token's type.
-std::string NormalizeToken(const Normalizer& normalizer, const Token& token) {
-  switch (token.type) {
-    case Token::Type::REGULAR:
-      return normalizer.NormalizeTerm(token.text);
-    case Token::Type::VERBATIM:
-      return std::string(token.text);
-    case Token::Type::QUERY_EXCLUSION:
-      [[fallthrough]];
-    case Token::Type::QUERY_LEFT_PARENTHESES:
-      [[fallthrough]];
-    case Token::Type::QUERY_RIGHT_PARENTHESES:
-      [[fallthrough]];
-    case Token::Type::QUERY_OR:
-      [[fallthrough]];
-    case Token::Type::QUERY_PROPERTY:
-      [[fallthrough]];
-    case Token::Type::INVALID:
-      ICING_LOG(WARNING) << "Unable to normalize token of type: "
-                         << static_cast<int>(token.type);
-      return std::string(token.text);
-  }
-}
-
-// Returns a CharacterIterator for token's text, advancing one past the last
-// matching character from the query term.
-CharacterIterator FindMatchEnd(const Normalizer& normalizer, const Token& token,
-                               const std::string& match_query_term) {
-  switch (token.type) {
-    case Token::Type::VERBATIM: {
-      // VERBATIM tokens are not normalized. This means the non-normalized
-      // matched query term must be either equal to or a prefix of the token's
-      // text. Therefore, the match must end at the end of the matched query
-      // term.
-      CharacterIterator verbatim_match_end =
-          CharacterIterator(token.text, 0, 0, 0);
-      verbatim_match_end.AdvanceToUtf8(match_query_term.length());
-      return verbatim_match_end;
-    }
-    case Token::Type::QUERY_EXCLUSION:
-      [[fallthrough]];
-    case Token::Type::QUERY_LEFT_PARENTHESES:
-      [[fallthrough]];
-    case Token::Type::QUERY_RIGHT_PARENTHESES:
-      [[fallthrough]];
-    case Token::Type::QUERY_OR:
-      [[fallthrough]];
-    case Token::Type::QUERY_PROPERTY:
-      [[fallthrough]];
-    case Token::Type::INVALID:
-      ICING_LOG(WARNING)
-          << "Unexpected Token type " << static_cast<int>(token.type)
-          << " found when finding match end of query term and token.";
-      [[fallthrough]];
-    case Token::Type::REGULAR:
-      return normalizer.FindNormalizedMatchEndPosition(token.text,
-                                                       match_query_term);
-  }
-}
-
 class TokenMatcher {
  public:
   virtual ~TokenMatcher() = default;
-
-  // Returns a CharacterIterator pointing just past the end of the substring in
-  // token.text that matches a query term. Note that the utf* indices will be
-  // in relation to token.text's start.
-  //
-  // If there is no match, then it will construct a CharacterIterator with all
-  // of its indices set to -1.
-  //
-  // Ex. With an exact matcher, query terms=["foo","bar"] and token.text="bar",
-  // Matches will return a CharacterIterator(u8:3, u16:3, u32:3).
-  virtual CharacterIterator Matches(Token token) const = 0;
+  virtual bool Matches(Token token) const = 0;
 };
 
 class TokenMatcherExact : public TokenMatcher {
@@ -163,18 +91,10 @@ class TokenMatcherExact : public TokenMatcher {
         restricted_query_terms_(restricted_query_terms),
         normalizer_(normalizer) {}
 
-  CharacterIterator Matches(Token token) const override {
-    std::string s = NormalizeToken(normalizer_, token);
-    auto itr = unrestricted_query_terms_.find(s);
-    if (itr == unrestricted_query_terms_.end()) {
-      itr = restricted_query_terms_.find(s);
-    }
-    if (itr != unrestricted_query_terms_.end() &&
-        itr != restricted_query_terms_.end()) {
-      return FindMatchEnd(normalizer_, token, *itr);
-    }
-
-    return CharacterIterator(token.text, -1, -1, -1);
+  bool Matches(Token token) const override {
+    std::string s = normalizer_.NormalizeTerm(token.text);
+    return (unrestricted_query_terms_.count(s) > 0) ||
+           (restricted_query_terms_.count(s) > 0);
   }
 
  private:
@@ -193,21 +113,22 @@ class TokenMatcherPrefix : public TokenMatcher {
         restricted_query_terms_(restricted_query_terms),
         normalizer_(normalizer) {}
 
-  CharacterIterator Matches(Token token) const override {
-    std::string s = NormalizeToken(normalizer_, token);
-    for (const std::string& query_term : unrestricted_query_terms_) {
-      if (query_term.length() <= s.length() &&
-          s.compare(0, query_term.length(), query_term) == 0) {
-        return FindMatchEnd(normalizer_, token, query_term);
-      }
-    }
-    for (const std::string& query_term : restricted_query_terms_) {
-      if (query_term.length() <= s.length() &&
-          s.compare(0, query_term.length(), query_term) == 0) {
-        return FindMatchEnd(normalizer_, token, query_term);
-      }
+  bool Matches(Token token) const override {
+    std::string s = normalizer_.NormalizeTerm(token.text);
+    if (std::any_of(unrestricted_query_terms_.begin(),
+                    unrestricted_query_terms_.end(),
+                    [&s](const std::string& term) {
+                      return term.length() <= s.length() &&
+                             s.compare(0, term.length(), term) == 0;
+                    })) {
+      return true;
     }
-    return CharacterIterator(token.text, -1, -1, -1);
+    return std::any_of(restricted_query_terms_.begin(),
+                       restricted_query_terms_.end(),
+                       [&s](const std::string& term) {
+                         return term.length() <= s.length() &&
+                                s.compare(0, term.length(), term) == 0;
+                       });
   }
 
  private:
@@ -245,7 +166,7 @@ libtextclassifier3::StatusOr<CharacterIterator> DetermineWindowStart(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
     std::string_view value, int window_start_min_exclusive_utf32,
     Tokenizer::Iterator* iterator) {
-  if (!iterator->ResetToTokenStartingAfter(window_start_min_exclusive_utf32)) {
+  if (!iterator->ResetToTokenAfter(window_start_min_exclusive_utf32)) {
     return absl_ports::InternalError(
         "Couldn't reset tokenizer to determine snippet window!");
   }
@@ -280,7 +201,7 @@ libtextclassifier3::StatusOr<CharacterIterator> DetermineWindowEnd(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
     std::string_view value, int window_end_max_exclusive_utf32,
     Tokenizer::Iterator* iterator) {
-  if (!iterator->ResetToTokenEndingBefore(window_end_max_exclusive_utf32)) {
+  if (!iterator->ResetToTokenBefore(window_end_max_exclusive_utf32)) {
     return absl_ports::InternalError(
         "Couldn't reset tokenizer to determine snippet window!");
   }
@@ -344,9 +265,9 @@ libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
   int match_len_utf32 = end_itr.utf32_index() - match_pos_utf32;
   int match_mid_utf32 = match_pos_utf32 + match_len_utf32 / 2;
   int window_start_min_exclusive_utf32 =
-      (match_mid_utf32 - snippet_spec.max_window_utf32_length() / 2) - 1;
+      (match_mid_utf32 - snippet_spec.max_window_bytes() / 2) - 1;
   int window_end_max_exclusive_utf32 =
-      match_mid_utf32 + (snippet_spec.max_window_utf32_length() + 1) / 2;
+      match_mid_utf32 + (snippet_spec.max_window_bytes() + 1) / 2;
 
   snippet_match.set_exact_match_byte_position(start_itr.utf8_index());
   snippet_match.set_exact_match_utf16_position(start_itr.utf16_index());
@@ -357,7 +278,7 @@ libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
 
   // Only include windows if it'll at least include the matched text. Otherwise,
   // it'll just be an empty string anyways.
-  if (snippet_spec.max_window_utf32_length() >= match_len_utf32) {
+  if (snippet_spec.max_window_bytes() >= match_len_utf32) {
     // Find the beginning of the window.
     ICING_ASSIGN_OR_RETURN(
         CharacterIterator window_start,
@@ -398,13 +319,8 @@ libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
 
     // DetermineWindowStart/End may change the position of the iterator. So,
     // reset the iterator back to the original position.
-    bool success = false;
-    if (match_pos_utf32 > 0) {
-      success = iterator->ResetToTokenStartingAfter(match_pos_utf32 - 1);
-    } else {
-      success = iterator->ResetToStart();
-    }
-
+    bool success = (match_pos_utf32 > 0) ? iterator->ResetToTokenAfter(match_pos_utf32 - 1)
+                                   : iterator->ResetToStart();
     if (!success) {
       return absl_ports::InternalError(
           "Couldn't reset tokenizer to determine snippet window!");
@@ -448,10 +364,7 @@ void GetEntriesFromProperty(const PropertyProto* current_property,
     CharacterIterator char_iterator(value);
     while (iterator->Advance()) {
       Token token = iterator->GetToken();
-      CharacterIterator submatch_end = matcher->Matches(token);
-      // If the token matched a query term, then submatch_end will point to an
-      // actual position within token.text.
-      if (submatch_end.utf8_index() != -1) {
+      if (matcher->Matches(token)) {
         if (!char_iterator.AdvanceToUtf8(token.text.data() - value.data())) {
           // We can't get the char_iterator to a valid position, so there's no
           // way for us to provide valid utf-16 indices. There's nothing more we
@@ -480,15 +393,7 @@ void GetEntriesFromProperty(const PropertyProto* current_property,
           }
         }
         SnippetMatchProto match = std::move(match_or).ValueOrDie();
-        // submatch_end refers to a position *within* token.text.
-        // This, conveniently enough, means that index that submatch_end points
-        // to is the length of the submatch (because the submatch starts at 0 in
-        // token.text).
-        match.set_submatch_byte_length(submatch_end.utf8_index());
-        match.set_submatch_utf16_length(submatch_end.utf16_index());
-        // Add the values for the submatch.
         snippet_entry.mutable_snippet_matches()->Add(std::move(match));
-
         if (--match_options->max_matches_remaining <= 0) {
           *snippet_proto->add_entries() = std::move(snippet_entry);
           return;
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index 0de2295..e7988ae 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -22,6 +22,7 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/mock-filesystem.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
@@ -36,14 +37,12 @@
 #include "icing/store/key-mapper.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/snippet-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
-#include "icing/transform/map/map-normalizer.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
 #include "unicode/uloc.h"
@@ -58,18 +57,16 @@ using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
-    PropertyConfigProto::Cardinality::REPEATED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_VERBATIM =
-    StringIndexingConfig::TokenizerType::VERBATIM;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
-constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
 
 std::vector<std::string_view> GetPropertyPaths(const SnippetProto& snippet) {
   std::vector<std::string_view> paths;
@@ -133,7 +130,7 @@ class SnippetRetrieverTest : public testing::Test {
     snippet_spec_.set_num_to_snippet(std::numeric_limits<int32_t>::max());
     snippet_spec_.set_num_matches_per_property(
         std::numeric_limits<int32_t>::max());
-    snippet_spec_.set_max_window_utf32_length(64);
+    snippet_spec_.set_max_window_bytes(64);
   }
 
   void TearDown() override {
@@ -180,7 +177,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeSmallerThanMatch) {
 
   // Window starts at the beginning of "three" and ends in the middle of
   // "three". len=4, orig_window= "thre"
-  snippet_spec_.set_max_window_utf32_length(4);
+  snippet_spec_.set_max_window_bytes(4);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -206,7 +203,7 @@ TEST_F(SnippetRetrieverTest,
 
   // Window starts at the beginning of "three" and at the exact end of
   // "three". len=5, orig_window= "three"
-  snippet_spec_.set_max_window_utf32_length(5);
+  snippet_spec_.set_max_window_bytes(5);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -232,7 +229,7 @@ TEST_F(SnippetRetrieverTest,
 
   // Window starts at the beginning of "four" and at the exact end of
   // "four". len=4, orig_window= "four"
-  snippet_spec_.set_max_window_utf32_length(4);
+  snippet_spec_.set_max_window_bytes(4);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -264,7 +261,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
   //   1. untrimmed, no-shifting window will be (2,17).
   //   2. trimmed, no-shifting window [4,13) "two three"
   //   3. trimmed, shifted window [4,18) "two three four"
-  snippet_spec_.set_max_window_utf32_length(14);
+  snippet_spec_.set_max_window_bytes(14);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -297,7 +294,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
   //   1. untrimmed, no-shifting window will be (1,18).
   //   2. trimmed, no-shifting window [4,18) "two three four"
   //   3. trimmed, shifted window [4,20) "two three four.."
-  snippet_spec_.set_max_window_utf32_length(16);
+  snippet_spec_.set_max_window_bytes(16);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -323,7 +320,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
 
   // Window ends in the middle of all the punctuation and window starts at 0.
   // len=20, orig_window="one two three four.."
-  snippet_spec_.set_max_window_utf32_length(20);
+  snippet_spec_.set_max_window_bytes(20);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -351,7 +348,7 @@ TEST_F(SnippetRetrieverTest,
 
   // Window ends in the middle of all the punctuation and window starts at 0.
   // len=26, orig_window="pside down in Australia¿"
-  snippet_spec_.set_max_window_utf32_length(24);
+  snippet_spec_.set_max_window_bytes(24);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -379,7 +376,7 @@ TEST_F(SnippetRetrieverTest,
 
   // Window ends in the middle of all the punctuation and window starts at 0.
   // len=26, orig_window="upside down in Australia¿ "
-  snippet_spec_.set_max_window_utf32_length(26);
+  snippet_spec_.set_max_window_bytes(26);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -412,7 +409,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
   //   1. untrimmed, no-shifting window will be (-2,21).
   //   2. trimmed, no-shifting window [0,21) "one two three four..."
   //   3. trimmed, shifted window [0,22) "one two three four...."
-  snippet_spec_.set_max_window_utf32_length(22);
+  snippet_spec_.set_max_window_bytes(22);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -438,7 +435,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
 
   // Window ends before "five" but after all the punctuation
   // len=26, orig_window="one two three four.... "
-  snippet_spec_.set_max_window_utf32_length(26);
+  snippet_spec_.set_max_window_bytes(26);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -471,7 +468,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
   //   1. untrimmed, no-shifting window will be ((-7,26).
   //   2. trimmed, no-shifting window [0,26) "one two three four...."
   //   3. trimmed, shifted window [0,27) "one two three four.... five"
-  snippet_spec_.set_max_window_utf32_length(32);
+  snippet_spec_.set_max_window_bytes(32);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -497,7 +494,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
 
   // Max window size equals the size of the value.
   // len=34, orig_window="one two three four.... five"
-  snippet_spec_.set_max_window_utf32_length(34);
+  snippet_spec_.set_max_window_bytes(34);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -523,7 +520,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
 
   // Max window size exceeds the size of the value.
   // len=36, orig_window="one two three four.... five"
-  snippet_spec_.set_max_window_utf32_length(36);
+  snippet_spec_.set_max_window_bytes(36);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -557,7 +554,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStart) {
   //   1. untrimmed, no-shifting window will be (-10,19).
   //   2. trimmed, no-shifting window [0,19) "one two three four."
   //   3. trimmed, shifted window [0,27) "one two three four.... five"
-  snippet_spec_.set_max_window_utf32_length(28);
+  snippet_spec_.set_max_window_bytes(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -591,7 +588,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEnd) {
   //   1. untrimmed, no-shifting window will be (10,39).
   //   2. trimmed, no-shifting window [14,31) "four.... five six"
   //   3. trimmed, shifted window [4,31) "two three four.... five six"
-  snippet_spec_.set_max_window_utf32_length(28);
+  snippet_spec_.set_max_window_bytes(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -625,7 +622,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStartShortText) {
   //   1. untrimmed, no-shifting window will be (-10,19).
   //   2. trimmed, no-shifting window [0, 19) "one two three four."
   //   3. trimmed, shifted window [0, 22) "one two three four...."
-  snippet_spec_.set_max_window_utf32_length(28);
+  snippet_spec_.set_max_window_bytes(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -659,7 +656,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEndShortText) {
   //   1. untrimmed, no-shifting window will be (1,30).
   //   2. trimmed, no-shifting window [4, 22) "two three four...."
   //   3. trimmed, shifted window [0, 22) "one two three four...."
-  snippet_spec_.set_max_window_utf32_length(28);
+  snippet_spec_.set_max_window_bytes(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -693,7 +690,6 @@ TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
               ElementsAre("subject foo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("f"));
 }
 
 TEST_F(SnippetRetrieverTest, ExactSnippeting) {
@@ -723,7 +719,7 @@ TEST_F(SnippetRetrieverTest, SimpleSnippetingNoWindowing) {
           .AddStringProperty("body", "Only a fool would match this content.")
           .Build();
 
-  snippet_spec_.set_max_window_utf32_length(0);
+  snippet_spec_.set_max_window_bytes(0);
 
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo"}}};
@@ -737,7 +733,6 @@ TEST_F(SnippetRetrieverTest, SimpleSnippetingNoWindowing) {
       GetString(&document, snippet.entries(0).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("foo"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
@@ -784,15 +779,12 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
           "we need to begin considering our options regarding body bar."));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
               ElementsAre("foo", "bar"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
-              ElementsAre("foo", "bar"));
 
   EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
   content = GetString(&document, snippet.entries(1).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(1)),
               ElementsAre("subject foo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("foo"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
@@ -842,8 +834,6 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
           "we need to begin considering our options regarding body bar."));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
               ElementsAre("foo", "bar"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
-              ElementsAre("foo", "bar"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
@@ -894,16 +884,12 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
           "Concerning the subject of foo, we need to begin considering our"));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
               ElementsAre("subject", "foo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
-              ElementsAre("subject", "foo"));
 
   EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
   content = GetString(&document, snippet.entries(1).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(1)),
               ElementsAre("subject foo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("subject"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)),
-              ElementsAre("subject"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
@@ -947,14 +933,12 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
       ElementsAre(
           "Concerning the subject of foo, we need to begin considering our"));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("foo"));
 
   EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
   content = GetString(&document, snippet.entries(1).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(1)),
               ElementsAre("subject foo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("foo"));
 }
 
 TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
@@ -976,7 +960,6 @@ TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
       GetString(&document, snippet.entries(0).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("MDI team"));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("MDI"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("MD"));
 }
 
 TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
@@ -1000,9 +983,6 @@ TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
               ElementsAre("Some members are in Zürich."));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("Zürich"));
-
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
-              ElementsAre("Zürich"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
@@ -1063,13 +1043,11 @@ TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
       GetString(&document, snippet.entries(0).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
 
   EXPECT_THAT(snippet.entries(1).property_name(), Eq("X[3]"));
   content = GetString(&document, snippet.entries(1).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
 
   EXPECT_THAT(GetPropertyPaths(snippet),
               ElementsAre("X[1]", "X[3]", "Y[1]", "Y[3]", "Z[1]", "Z[3]"));
@@ -1166,13 +1144,11 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) {
       GetString(&document, snippet.entries(0).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
 
   EXPECT_THAT(snippet.entries(1).property_name(), Eq("A.X[3]"));
   content = GetString(&document, snippet.entries(1).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
 
   EXPECT_THAT(
       GetPropertyPaths(snippet),
@@ -1275,13 +1251,11 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) {
       GetString(&document, snippet.entries(0).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
 
   EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[0].X[3]"));
   content = GetString(&document, snippet.entries(1).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
 
   EXPECT_THAT(GetPropertyPaths(snippet),
               ElementsAre("A[0].X[1]", "A[0].X[3]", "A[1].X[1]", "A[1].X[3]",
@@ -1382,13 +1356,11 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) {
       GetString(&document, snippet.entries(0).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
 
   EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[1].X"));
   content = GetString(&document, snippet.entries(1).property_name());
   EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
-  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
 
   EXPECT_THAT(
       GetPropertyPaths(snippet),
@@ -1432,12 +1404,10 @@ TEST_F(SnippetRetrieverTest, CJKSnippetMatchTest) {
 
   // Ensure that the match is correct.
   EXPECT_THAT(GetMatches(content, *entry), ElementsAre("走路"));
-  EXPECT_THAT(GetSubMatches(content, *entry), ElementsAre("走"));
 
   // Ensure that the utf-16 values are also as expected
   EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
   EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
-  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(1));
 }
 
 TEST_F(SnippetRetrieverTest, CJKSnippetWindowTest) {
@@ -1475,7 +1445,7 @@ TEST_F(SnippetRetrieverTest, CJKSnippetWindowTest) {
   //   1. untrimmed, no-shifting window will be (0,7).
   //   2. trimmed, no-shifting window [1, 6) "每天走路去".
   //   3. trimmed, shifted window [0, 6) "我每天走路去"
-  snippet_spec_.set_max_window_utf32_length(6);
+  snippet_spec_.set_max_window_bytes(6);
 
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
@@ -1537,12 +1507,10 @@ TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitSnippetMatchTest) {
 
   // Ensure that the match is correct.
   EXPECT_THAT(GetMatches(content, *entry), ElementsAre("𐀂𐀃"));
-  EXPECT_THAT(GetSubMatches(content, *entry), ElementsAre("𐀂"));
 
   // Ensure that the utf-16 values are also as expected
   EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(5));
   EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(4));
-  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(2));
 }
 
 TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
@@ -1574,7 +1542,7 @@ TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
   // UTF8 idx:       9   22
   // UTF16 idx:      5   12
   // UTF32 idx:      3   7
-  snippet_spec_.set_max_window_utf32_length(6);
+  snippet_spec_.set_max_window_bytes(6);
 
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
@@ -1598,117 +1566,6 @@ TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
   EXPECT_THAT(match_proto.window_utf16_length(), Eq(7));
 }
 
-TEST_F(SnippetRetrieverTest, SnippettingVerbatimAscii) {
-  SchemaProto schema =
-      SchemaBuilder()
-          .AddType(SchemaTypeConfigBuilder()
-                       .SetType("verbatimType")
-                       .AddProperty(PropertyConfigBuilder()
-                                        .SetName("verbatim")
-                                        .SetDataTypeString(MATCH_EXACT,
-                                                           TOKENIZER_VERBATIM)
-                                        .SetCardinality(CARDINALITY_REPEATED)))
-          .Build();
-  ICING_ASSERT_OK(schema_store_->SetSchema(
-      schema, /*ignore_errors_and_delete_documents=*/true));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      snippet_retriever_,
-      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
-                               normalizer_.get()));
-
-  DocumentProto document = DocumentBuilder()
-                               .SetKey("icing", "verbatim/1")
-                               .SetSchema("verbatimType")
-                               .AddStringProperty("verbatim", "Hello, world!")
-                               .Build();
-
-  SectionIdMask section_mask = 0b00000001;
-  SectionRestrictQueryTermsMap query_terms{{"", {"Hello, world!"}}};
-
-  snippet_spec_.set_max_window_utf32_length(13);
-  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
-
-  // There should only be one snippet entry and match, the verbatim token in its
-  // entirety.
-  ASSERT_THAT(snippet.entries(), SizeIs(1));
-
-  const SnippetProto::EntryProto* entry = &snippet.entries(0);
-  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
-  ASSERT_THAT(entry->property_name(), "verbatim");
-
-  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
-  // We expect the match to begin at position 0, and to span the entire token
-  // which contains 13 characters.
-  EXPECT_THAT(match_proto.window_byte_position(), Eq(0));
-  EXPECT_THAT(match_proto.window_utf16_length(), Eq(13));
-
-  // We expect the submatch to begin at position 0 of the verbatim token and
-  // span the length of our query term "Hello, world!", which has utf-16 length
-  // of 13. The submatch length is equal to the window length as the query the
-  // snippet is retrieved with an exact term match.
-  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(0));
-  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(13));
-}
-
-TEST_F(SnippetRetrieverTest, SnippettingVerbatimCJK) {
-  SchemaProto schema =
-      SchemaBuilder()
-          .AddType(SchemaTypeConfigBuilder()
-                       .SetType("verbatimType")
-                       .AddProperty(PropertyConfigBuilder()
-                                        .SetName("verbatim")
-                                        .SetDataTypeString(MATCH_PREFIX,
-                                                           TOKENIZER_VERBATIM)
-                                        .SetCardinality(CARDINALITY_REPEATED)))
-          .Build();
-  ICING_ASSERT_OK(schema_store_->SetSchema(
-      schema, /*ignore_errors_and_delete_documents=*/true));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      snippet_retriever_,
-      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
-                               normalizer_.get()));
-
-  // String:     "我每天走路去上班。"
-  //              ^ ^  ^   ^^
-  // UTF8 idx:    0 3  9  15 18
-  // UTF16 idx:   0 1  3   5 6
-  // UTF32 idx:   0 1  3   5 6
-  // Breaks into segments: "我", "每天", "走路", "去", "上班"
-  std::string chinese_string = "我每天走路去上班。";
-  DocumentProto document = DocumentBuilder()
-                               .SetKey("icing", "verbatim/1")
-                               .SetSchema("verbatimType")
-                               .AddStringProperty("verbatim", chinese_string)
-                               .Build();
-
-  SectionIdMask section_mask = 0b00000001;
-  SectionRestrictQueryTermsMap query_terms{{"", {"我每"}}};
-
-  snippet_spec_.set_max_window_utf32_length(9);
-  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
-
-  // There should only be one snippet entry and match, the verbatim token in its
-  // entirety.
-  ASSERT_THAT(snippet.entries(), SizeIs(1));
-
-  const SnippetProto::EntryProto* entry = &snippet.entries(0);
-  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
-  ASSERT_THAT(entry->property_name(), "verbatim");
-
-  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
-  // We expect the match to begin at position 0, and to span the entire token
-  // which has utf-16 length of 9.
-  EXPECT_THAT(match_proto.window_byte_position(), Eq(0));
-  EXPECT_THAT(match_proto.window_utf16_length(), Eq(9));
-
-  // We expect the submatch to begin at position 0 of the verbatim token and
-  // span the length of our query term "我每", which has utf-16 length of 2.
-  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(0));
-  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(2));
-}
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index fc50ea6..e9ba654 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -108,60 +108,27 @@ libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
   ICING_RETURN_ERROR_IF_NULL(filesystem);
   ICING_RETURN_ERROR_IF_NULL(clock);
 
-  if (!filesystem->DirectoryExists(base_dir.c_str())) {
-    return absl_ports::FailedPreconditionError(
-        "Schema store base directory does not exist!");
-  }
   std::unique_ptr<SchemaStore> schema_store = std::unique_ptr<SchemaStore>(
       new SchemaStore(filesystem, base_dir, clock));
   ICING_RETURN_IF_ERROR(schema_store->Initialize(initialize_stats));
   return schema_store;
 }
 
-libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
-    const Filesystem* filesystem, const std::string& base_dir,
-    const Clock* clock, SchemaProto schema) {
-  ICING_RETURN_ERROR_IF_NULL(filesystem);
-  ICING_RETURN_ERROR_IF_NULL(clock);
-
-  if (!filesystem->DirectoryExists(base_dir.c_str())) {
-    return absl_ports::FailedPreconditionError(
-        "Schema store base directory does not exist!");
-  }
-  std::unique_ptr<SchemaStore> schema_store = std::unique_ptr<SchemaStore>(
-      new SchemaStore(filesystem, base_dir, clock));
-  ICING_RETURN_IF_ERROR(schema_store->Initialize(std::move(schema)));
-  return schema_store;
-}
-
 SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir,
                          const Clock* clock)
-    : filesystem_(filesystem),
+    : filesystem_(*filesystem),
       base_dir_(std::move(base_dir)),
-      clock_(clock),
-      schema_file_(std::make_unique<FileBackedProto<SchemaProto>>(
-          *filesystem, MakeSchemaFilename(base_dir_))) {}
+      clock_(*clock),
+      schema_file_(*filesystem, MakeSchemaFilename(base_dir_)) {}
 
 SchemaStore::~SchemaStore() {
-  if (has_schema_successfully_set_ && schema_file_ != nullptr &&
-      schema_type_mapper_ != nullptr && section_manager_ != nullptr) {
+  if (has_schema_successfully_set_) {
     if (!PersistToDisk().ok()) {
       ICING_LOG(ERROR) << "Error persisting to disk in SchemaStore destructor";
     }
   }
 }
 
-libtextclassifier3::Status SchemaStore::Initialize(SchemaProto new_schema) {
-  if (!absl_ports::IsNotFound(GetSchema().status())) {
-    return absl_ports::FailedPreconditionError(
-        "Incorrectly tried to initialize schema store with a new schema, when "
-        "one is already set!");
-  }
-  ICING_RETURN_IF_ERROR(schema_file_->Write(
-      std::make_unique<SchemaProto>(std::move(new_schema))));
-  return InitializeInternal(/*initialize_stats=*/nullptr);
-}
-
 libtextclassifier3::Status SchemaStore::Initialize(
     InitializeStatsProto* initialize_stats) {
   auto schema_proto_or = GetSchema();
@@ -172,16 +139,13 @@ libtextclassifier3::Status SchemaStore::Initialize(
     // Real error when trying to read the existing schema
     return schema_proto_or.status();
   }
-  return InitializeInternal(initialize_stats);
-}
+  has_schema_successfully_set_ = true;
 
-libtextclassifier3::Status SchemaStore::InitializeInternal(
-    InitializeStatsProto* initialize_stats) {
   if (!InitializeDerivedFiles().ok()) {
     ICING_VLOG(3)
         << "Couldn't find derived files or failed to initialize them, "
            "regenerating derived files for SchemaStore.";
-    std::unique_ptr<Timer> regenerate_timer = clock_->GetNewTimer();
+    std::unique_ptr<Timer> regenerate_timer = clock_.GetNewTimer();
     if (initialize_stats != nullptr) {
       initialize_stats->set_schema_store_recovery_cause(
           InitializeStatsProto::IO_ERROR);
@@ -197,7 +161,6 @@ libtextclassifier3::Status SchemaStore::InitializeInternal(
     initialize_stats->set_num_schema_types(type_config_map_.size());
   }
 
-  has_schema_successfully_set_ = true;
   return libtextclassifier3::Status::OK;
 }
 
@@ -209,8 +172,8 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
   }
 
   SchemaStore::Header header;
-  if (!filesystem_->Read(MakeHeaderFilename(base_dir_).c_str(), &header,
-                         sizeof(header))) {
+  if (!filesystem_.Read(MakeHeaderFilename(base_dir_).c_str(), &header,
+                        sizeof(header))) {
     return absl_ports::InternalError(
         absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
   }
@@ -222,7 +185,7 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
 
   ICING_ASSIGN_OR_RETURN(
       schema_type_mapper_,
-      KeyMapper<SchemaTypeId>::Create(*filesystem_,
+      KeyMapper<SchemaTypeId>::Create(filesystem_,
                                       MakeSchemaTypeMapperFilename(base_dir_),
                                       kSchemaTypeMapperMaxSize));
 
@@ -273,12 +236,12 @@ libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles() {
 }
 
 bool SchemaStore::HeaderExists() {
-  if (!filesystem_->FileExists(MakeHeaderFilename(base_dir_).c_str())) {
+  if (!filesystem_.FileExists(MakeHeaderFilename(base_dir_).c_str())) {
     return false;
   }
 
   int64_t file_size =
-      filesystem_->GetFileSize(MakeHeaderFilename(base_dir_).c_str());
+      filesystem_.GetFileSize(MakeHeaderFilename(base_dir_).c_str());
 
   // If it's been truncated to size 0 before, we consider it to be a new file
   return file_size != 0 && file_size != Filesystem::kBadFileSize;
@@ -291,11 +254,11 @@ libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) {
   header.checksum = checksum.Get();
 
   ScopedFd scoped_fd(
-      filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
+      filesystem_.OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
   // This should overwrite the header.
   if (!scoped_fd.is_valid() ||
-      !filesystem_->Write(scoped_fd.get(), &header, sizeof(header)) ||
-      !filesystem_->DataSync(scoped_fd.get())) {
+      !filesystem_.Write(scoped_fd.get(), &header, sizeof(header)) ||
+      !filesystem_.DataSync(scoped_fd.get())) {
     return absl_ports::InternalError(absl_ports::StrCat(
         "Failed to write SchemaStore header: ", MakeHeaderFilename(base_dir_)));
   }
@@ -305,10 +268,10 @@ libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) {
 libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   schema_type_mapper_.reset();
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = KeyMapper<SchemaTypeId>::Delete(
-      *filesystem_, MakeSchemaTypeMapperFilename(base_dir_));
+      filesystem_, MakeSchemaTypeMapperFilename(base_dir_));
   if (!status.ok()) {
     ICING_LOG(ERROR) << status.error_message()
                      << "Failed to delete old schema_type mapper";
@@ -316,7 +279,7 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
   }
   ICING_ASSIGN_OR_RETURN(
       schema_type_mapper_,
-      KeyMapper<SchemaTypeId>::Create(*filesystem_,
+      KeyMapper<SchemaTypeId>::Create(filesystem_,
                                       MakeSchemaTypeMapperFilename(base_dir_),
                                       kSchemaTypeMapperMaxSize));
 
@@ -324,17 +287,17 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
 }
 
 libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
-  auto schema_proto_or = GetSchema();
-  if (absl_ports::IsNotFound(schema_proto_or.status())) {
-    return Crc32();
+  Crc32 total_checksum;
+  if (!has_schema_successfully_set_) {
+    // Nothing to checksum
+    return total_checksum;
   }
-  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, schema_proto_or);
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
   Crc32 schema_checksum;
   schema_checksum.Append(schema_proto->SerializeAsString());
 
   Crc32 schema_type_mapper_checksum = schema_type_mapper_->ComputeChecksum();
 
-  Crc32 total_checksum;
   total_checksum.Append(std::to_string(schema_checksum.Get()));
   total_checksum.Append(std::to_string(schema_type_mapper_checksum.Get()));
 
@@ -343,7 +306,7 @@ libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
 
 libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema()
     const {
-  return schema_file_->Read();
+  return schema_file_.Read();
 }
 
 // TODO(cassiewang): Consider removing this definition of SetSchema if it's not
@@ -368,9 +331,6 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
   if (absl_ports::IsNotFound(schema_proto_or.status())) {
     // We don't have a pre-existing schema, so anything is valid.
     result.success = true;
-    for (const SchemaTypeConfigProto& type_config : new_schema.types()) {
-      result.schema_types_new_by_name.insert(type_config.schema_type());
-    }
   } else if (!schema_proto_or.ok()) {
     // Real error
     return schema_proto_or.status();
@@ -391,11 +351,8 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
         SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
                                               new_dependency_map);
 
-    result.schema_types_new_by_name = std::move(schema_delta.schema_types_new);
-    result.schema_types_changed_fully_compatible_by_name =
-        std::move(schema_delta.schema_types_changed_fully_compatible);
-    result.schema_types_index_incompatible_by_name =
-        std::move(schema_delta.schema_types_index_incompatible);
+    // An incompatible index is fine, we can just reindex
+    result.index_incompatible = schema_delta.index_incompatible;
 
     for (const auto& schema_type : schema_delta.schema_types_deleted) {
       // We currently don't support deletions, so mark this as not possible.
@@ -430,78 +387,15 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
   result.success = result.success || ignore_errors_and_delete_documents;
 
   if (result.success) {
-    ICING_RETURN_IF_ERROR(ApplySchemaChange(std::move(new_schema)));
+    // Write the schema (and potentially overwrite a previous schema)
+    ICING_RETURN_IF_ERROR(
+        schema_file_.Write(std::make_unique<SchemaProto>(new_schema)));
     has_schema_successfully_set_ = true;
-  }
 
-  return result;
-}
-
-libtextclassifier3::Status SchemaStore::ApplySchemaChange(
-    SchemaProto new_schema) {
-  // We need to ensure that we either 1) successfully set the schema and
-  // update all derived data structures or 2) fail and leave the schema store
-  // unchanged.
-  // So, first, we create an empty temporary directory to build a new schema
-  // store in.
-  std::string temp_schema_store_dir_path = base_dir_ + "_temp";
-  if (!filesystem_->DeleteDirectoryRecursively(
-          temp_schema_store_dir_path.c_str())) {
-    ICING_LOG(WARNING) << "Failed to recursively delete "
-                     << temp_schema_store_dir_path.c_str();
-    return absl_ports::InternalError(
-        "Unable to delete temp directory to prepare to build new schema "
-        "store.");
-  }
-
-  if (!filesystem_->CreateDirectoryRecursively(
-      temp_schema_store_dir_path.c_str())) {
-    return absl_ports::InternalError(
-        "Unable to create temp directory to build new schema store.");
-  }
-
-  // Then we create our new schema store with the new schema.
-  auto new_schema_store_or =
-      SchemaStore::Create(filesystem_, temp_schema_store_dir_path, clock_,
-                          std::move(new_schema));
-  if (!new_schema_store_or.ok()) {
-    // Attempt to clean up the temp directory.
-    if (!filesystem_->DeleteDirectoryRecursively(
-            temp_schema_store_dir_path.c_str())) {
-      // Nothing to do here. Just log an error.
-      ICING_LOG(WARNING) << "Failed to recursively delete "
-                       << temp_schema_store_dir_path.c_str();
-    }
-    return new_schema_store_or.status();
-  }
-  std::unique_ptr<SchemaStore> new_schema_store =
-      std::move(new_schema_store_or).ValueOrDie();
-
-  // Then we swap the new schema file + new derived files with the old files.
-  if (!filesystem_->SwapFiles(base_dir_.c_str(),
-                              temp_schema_store_dir_path.c_str())) {
-    // Attempt to clean up the temp directory.
-    if (!filesystem_->DeleteDirectoryRecursively(
-            temp_schema_store_dir_path.c_str())) {
-      // Nothing to do here. Just log an error.
-      ICING_LOG(WARNING) << "Failed to recursively delete "
-                       << temp_schema_store_dir_path.c_str();
-    }
-    return absl_ports::InternalError(
-        "Unable to apply new schema due to failed swap!");
+    ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
   }
 
-  std::string old_base_dir = std::move(base_dir_);
-  *this = std::move(*new_schema_store);
-
-  // After the std::move, the filepaths saved in this instance and in the
-  // schema_file_ instance will still be the one from temp_schema_store_dir
-  // even though they now point to files that are within old_base_dir.
-  // Manually set them to the correct paths.
-  base_dir_ = std::move(old_base_dir);
-  schema_file_->SetSwappedFilepath(MakeSchemaFilename(base_dir_));
-
-  return libtextclassifier3::Status::OK;
+  return result;
 }
 
 libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
@@ -563,9 +457,12 @@ libtextclassifier3::Status SchemaStore::PersistToDisk() {
 
 SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
   SchemaStoreStorageInfoProto storage_info;
-  int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str());
-  storage_info.set_schema_store_size(
-      Filesystem::SanitizeFileSize(directory_size));
+  int64_t directory_size = filesystem_.GetDiskUsage(base_dir_.c_str());
+  if (directory_size != Filesystem::kBadFileSize) {
+    storage_info.set_schema_store_size(directory_size);
+  } else {
+    storage_info.set_schema_store_size(-1);
+  }
   ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema(), storage_info);
   storage_info.set_num_schema_types(schema->types_size());
   int total_sections = 0;
@@ -588,22 +485,5 @@ SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
   return storage_info;
 }
 
-libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
-SchemaStore::GetSectionMetadata(const std::string& schema_type) const {
-  return section_manager_->GetMetadataList(schema_type);
-}
-
-libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo()
-    const {
-  SchemaDebugInfoProto debug_info;
-  if (has_schema_successfully_set_) {
-    ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema());
-    *debug_info.mutable_schema() = *schema;
-  }
-  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
-  debug_info.set_crc(crc.Get());
-  return debug_info;
-}
-
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 58e5477..dd1edb8 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -26,7 +26,6 @@
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/file/file-backed-proto.h"
 #include "icing/file/filesystem.h"
-#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/logging.pb.h"
 #include "icing/proto/schema.pb.h"
@@ -69,6 +68,9 @@ class SchemaStore {
     // to file.
     bool success = false;
 
+    // Whether the new schema changes invalidate the index.
+    bool index_incompatible = false;
+
     // SchemaTypeIds of schema types can be reassigned new SchemaTypeIds if:
     //   1. Schema types are added in the middle of the SchemaProto
     //   2. Schema types are removed from the middle of the SchemaProto
@@ -98,21 +100,6 @@ class SchemaStore {
     // SchemaUtil::ComputeCompatibilityDelta. Represented by the SchemaTypeId
     // assigned to this SchemaTypeConfigProto in the *old* schema.
     std::unordered_set<SchemaTypeId> schema_types_incompatible_by_id;
-
-    // Schema types that were added in the new schema. Represented by the
-    // `schema_type` field in the SchemaTypeConfigProto.
-    std::unordered_set<std::string> schema_types_new_by_name;
-
-    // Schema types that were changed in a way that was backwards compatible and
-    // didn't invalidate the index. Represented by the `schema_type` field in
-    // the SchemaTypeConfigProto.
-    std::unordered_set<std::string>
-        schema_types_changed_fully_compatible_by_name;
-
-    // Schema types that were changed in a way that was backwards compatible,
-    // but invalidated the index. Represented by the `schema_type` field in the
-    // SchemaTypeConfigProto.
-    std::unordered_set<std::string> schema_types_index_incompatible_by_name;
   };
 
   // Factory function to create a SchemaStore which does not take ownership
@@ -130,17 +117,17 @@ class SchemaStore {
   static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
       const Filesystem* filesystem, const std::string& base_dir,
       const Clock* clock, InitializeStatsProto* initialize_stats = nullptr);
-  
-  SchemaStore(SchemaStore&&) = default;
-  SchemaStore& operator=(SchemaStore&&) = default;
 
+  // Not copyable
   SchemaStore(const SchemaStore&) = delete;
   SchemaStore& operator=(const SchemaStore&) = delete;
 
   // Persists and updates checksum of subcomponents.
   ~SchemaStore();
 
-  // Retrieve the current schema if it exists.
+  // Retrieve the current schema if it exists. Caller does not get ownership of
+  // the schema proto and modifying the returned pointer does not affect the
+  // underlying schema proto.
   //
   // Returns:
   //   SchemaProto* if exists
@@ -247,70 +234,23 @@ class SchemaStore {
   //   INTERNAL_ERROR on compute error
   libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
 
-  // Returns:
-  //   - On success, the section metadata list for the specified schema type
-  //   - NOT_FOUND if the schema type is not present in the schema
-  libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
-  GetSectionMetadata(const std::string& schema_type) const;
-
   // Calculates the StorageInfo for the Schema Store.
   //
   // If an IO error occurs while trying to calculate the value for a field, then
   // that field will be set to -1.
   SchemaStoreStorageInfoProto GetStorageInfo() const;
 
-  // Get debug information for the schema store.
-  //
-  // Returns:
-  //   SchemaDebugInfoProto on success
-  //   INTERNAL_ERROR on IO errors, crc compute error
-  libtextclassifier3::StatusOr<SchemaDebugInfoProto> GetDebugInfo() const;
-
  private:
-  // Factory function to create a SchemaStore and set its schema. The created
-  // instance does not take ownership of any input components and all pointers
-  // must refer to valid objects that outlive the created SchemaStore instance.
-  // The base_dir must already exist. No schema must have set in base_dir prior
-  // to this.
-  //
-  // Returns:
-  //   A SchemaStore on success
-  //   FAILED_PRECONDITION on any null pointer input or if there has already
-  //       been a schema set for this path.
-  //   INTERNAL_ERROR on any IO errors
-  static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
-      const Filesystem* filesystem, const std::string& base_dir,
-      const Clock* clock, SchemaProto schema);
-
-
   // Use SchemaStore::Create instead.
   explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
                        const Clock* clock);
 
-  // Verifies that there is no error retrieving a previously set schema. Then
-  // initializes like normal.
-  //
-  // Returns:
-  //   OK on success
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status Initialize(InitializeStatsProto* initialize_stats);
-
-  // First, blindly writes new_schema to the schema_file. Then initializes like
-  // normal.
-  //
-  // Returns:
-  //   OK on success
-  //   INTERNAL_ERROR on IO error
-  //   FAILED_PRECONDITION if there is already a schema set for the schema_file.
-  libtextclassifier3::Status Initialize(SchemaProto new_schema);
-
   // Handles initializing the SchemaStore and regenerating any data if needed.
   //
   // Returns:
   //   OK on success
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status InitializeInternal(
-      InitializeStatsProto* initialize_stats);
+  libtextclassifier3::Status Initialize(InitializeStatsProto* initialize_stats);
 
   // Creates sub-components and verifies the integrity of each sub-component.
   //
@@ -346,25 +286,15 @@ class SchemaStore {
   // Returns any IO errors.
   libtextclassifier3::Status ResetSchemaTypeMapper();
 
-  // Creates a new schema store with new_schema and then swaps that new schema
-  // store with the existing one. This function guarantees that either: this
-  // instance will be fully updated to the new schema or no changes will take
-  // effect.
-  //
-  // Returns:
-  //   OK on success
-  //   INTERNAL on I/O error.
-  libtextclassifier3::Status ApplySchemaChange(SchemaProto new_schema);
-
   libtextclassifier3::Status CheckSchemaSet() const {
     return has_schema_successfully_set_
                ? libtextclassifier3::Status::OK
                : absl_ports::FailedPreconditionError("Schema not set yet.");
   }
 
-  const Filesystem* filesystem_;
-  std::string base_dir_;
-  const Clock* clock_;
+  const Filesystem& filesystem_;
+  const std::string base_dir_;
+  const Clock& clock_;
 
   // Used internally to indicate whether the class has been successfully
   // initialized with a valid schema. Will be false if Initialize failed or no
@@ -372,7 +302,7 @@ class SchemaStore {
   bool has_schema_successfully_set_ = false;
 
   // Cached schema
-  std::unique_ptr<FileBackedProto<SchemaProto>> schema_file_;
+  FileBackedProto<SchemaProto> schema_file_;
 
   // A hash map of (type config name -> type config), allows faster lookup of
   // type config in schema. The O(1) type config access makes schema-related and
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 3fd41c4..5ef2dea 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -21,9 +21,7 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
-#include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/file/mock-filesystem.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
@@ -35,7 +33,6 @@
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/tmp-directory.h"
-#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/util/crc32.h"
 
 namespace icing {
@@ -47,35 +44,28 @@ using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::Ge;
-using ::testing::Gt;
-using ::testing::HasSubstr;
 using ::testing::Not;
 using ::testing::Pointee;
-using ::testing::Return;
-using ::testing::SizeIs;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
-    PropertyConfigProto::Cardinality::REPEATED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
-constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
 
-constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
-    PropertyConfigProto::DataType::STRING;
-constexpr PropertyConfigProto::DataType::Code TYPE_DOUBLE =
-    PropertyConfigProto::DataType::DOUBLE;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_DOUBLE =
+    PropertyConfigProto_DataType_Code_DOUBLE;
 
 class SchemaStoreTest : public ::testing::Test {
  protected:
-  void SetUp() override {
-    temp_dir_ = GetTestTempDir() + "/icing";
-    schema_store_dir_ = temp_dir_ + "/schema_store";
-    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+  SchemaStoreTest() : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
 
     schema_ =
         SchemaBuilder()
@@ -89,117 +79,30 @@ class SchemaStoreTest : public ::testing::Test {
   }
 
   void TearDown() override {
-    // Check that the schema store directory is the *only* directory in the
-    // schema_store_dir_. IOW, ensure that all temporary directories have been
-    // properly cleaned up.
-    std::vector<std::string> sub_dirs;
-    ASSERT_TRUE(filesystem_.ListDirectory(temp_dir_.c_str(), &sub_dirs));
-    ASSERT_THAT(sub_dirs, ElementsAre("schema_store"));
-
-    // Finally, clean everything up.
-    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(temp_dir_.c_str()));
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }
 
-  Filesystem filesystem_;
-  std::string temp_dir_;
-  std::string schema_store_dir_;
+  const Filesystem filesystem_;
+  const std::string test_dir_;
   SchemaProto schema_;
-  FakeClock fake_clock_;
+  const FakeClock fake_clock_;
 };
 
 TEST_F(SchemaStoreTest, CreationWithNullPointerShouldFail) {
   EXPECT_THAT(
-      SchemaStore::Create(/*filesystem=*/nullptr, schema_store_dir_, &fake_clock_),
+      SchemaStore::Create(/*filesystem=*/nullptr, test_dir_, &fake_clock_),
       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
-TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
-  // Create an instance of SchemaStore.
-  SchemaProto schema =
-      SchemaBuilder()
-          .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
-              PropertyConfigBuilder()
-                  .SetName("prop1")
-                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
-                  .SetCardinality(CARDINALITY_OPTIONAL)))
-          .Build();
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
-  ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum,
-                             schema_store->ComputeChecksum());
-
-  // Move construct an instance of SchemaStore
-  SchemaStore move_constructed_schema_store(std::move(*schema_store));
-  EXPECT_THAT(move_constructed_schema_store.GetSchema(),
-              IsOkAndHolds(Pointee(EqualsProto(schema))));
-  EXPECT_THAT(move_constructed_schema_store.ComputeChecksum(),
-              IsOkAndHolds(Eq(expected_checksum)));
-  SectionMetadata expected_metadata(/*id_in=*/0, MATCH_EXACT, TOKENIZER_PLAIN,
-                                    "prop1");
-  EXPECT_THAT(move_constructed_schema_store.GetSectionMetadata("TypeA"),
-              IsOkAndHolds(Pointee(ElementsAre(expected_metadata))));
-}
-
-TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
-  // Create an instance of SchemaStore.
-  SchemaProto schema1 =
-      SchemaBuilder()
-          .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
-              PropertyConfigBuilder()
-                  .SetName("prop1")
-                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
-                  .SetCardinality(CARDINALITY_OPTIONAL)))
-          .Build();
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-
-  ICING_ASSERT_OK(schema_store->SetSchema(schema1));
-  ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum,
-                             schema_store->ComputeChecksum());
-
-  // Construct another instance of SchemaStore
-  SchemaProto schema2 =
-      SchemaBuilder()
-          .AddType(SchemaTypeConfigBuilder().SetType("TypeB").AddProperty(
-              PropertyConfigBuilder()
-                  .SetName("prop2")
-                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
-                  .SetCardinality(CARDINALITY_OPTIONAL)))
-          .Build();
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> move_assigned_schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-  ICING_ASSERT_OK(schema_store->SetSchema(schema2));
-
-  // Move assign the first instance into the second one.
-  *move_assigned_schema_store = std::move(*schema_store);
-  EXPECT_THAT(move_assigned_schema_store->GetSchema(),
-              IsOkAndHolds(Pointee(EqualsProto(schema1))));
-  EXPECT_THAT(move_assigned_schema_store->ComputeChecksum(),
-              IsOkAndHolds(Eq(expected_checksum)));
-  SectionMetadata expected_metadata(/*id_in=*/0, MATCH_EXACT, TOKENIZER_PLAIN,
-                                    "prop1");
-  EXPECT_THAT(move_assigned_schema_store->GetSectionMetadata("TypeA"),
-              IsOkAndHolds(Pointee(ElementsAre(expected_metadata))));
-}
-
 TEST_F(SchemaStoreTest, CorruptSchemaError) {
   {
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
-    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
     EXPECT_THAT(schema_store->SetSchema(schema_),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -215,14 +118,14 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) {
           .AddType(SchemaTypeConfigBuilder().SetType("corrupted"))
           .Build();
 
-  const std::string schema_file = absl_ports::StrCat(schema_store_dir_, "/schema.pb");
+  const std::string schema_file = absl_ports::StrCat(test_dir_, "/schema.pb");
   const std::string serialized_schema = corrupt_schema.SerializeAsString();
 
   filesystem_.Write(schema_file.c_str(), serialized_schema.data(),
                     serialized_schema.size());
 
   // If ground truth was corrupted, we won't know what to do
-  EXPECT_THAT(SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_),
               StatusIs(libtextclassifier3::StatusCode::INTERNAL));
 }
 
@@ -230,12 +133,11 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
   {
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
-    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
     EXPECT_THAT(schema_store->SetSchema(schema_),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -250,12 +152,12 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
   // regenerated from ground truth
 
   const std::string schema_type_mapper_dir =
-      absl_ports::StrCat(schema_store_dir_, "/schema_type_mapper");
+      absl_ports::StrCat(test_dir_, "/schema_type_mapper");
   filesystem_.DeleteDirectoryRecursively(schema_type_mapper_dir.c_str());
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Everything looks fine, ground truth and derived data
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -268,12 +170,11 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
   {
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
-    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
     EXPECT_THAT(schema_store->SetSchema(schema_),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -287,7 +188,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
   // the recalculated checksum on initialization. This will force a regeneration
   // of derived files from ground truth.
   const std::string header_file =
-      absl_ports::StrCat(schema_store_dir_, "/schema_store_header");
+      absl_ports::StrCat(test_dir_, "/schema_store_header");
   SchemaStore::Header header;
   header.magic = SchemaStore::Header::kMagic;
   header.checksum = 10;  // Arbitrary garbage checksum
@@ -296,7 +197,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Everything looks fine, ground truth and derived data
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -308,7 +209,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
 TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // The apis to retrieve information about the schema should fail gracefully.
   EXPECT_THAT(store->GetSchema(),
@@ -341,16 +242,15 @@ TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
 TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
   schema_store.reset();
-  EXPECT_THAT(SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_),
               IsOk());
 }
 
@@ -363,11 +263,10 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
@@ -383,7 +282,7 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) {
 
   schema_store.reset();
   ICING_ASSERT_OK_AND_ASSIGN(
-      schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      schema_store, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Verify that our in-memory structures are ok
   EXPECT_THAT(schema_store->GetSchemaTypeConfig("email"),
@@ -399,12 +298,11 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) {
 TEST_F(SchemaStoreTest, SetNewSchemaOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -415,12 +313,11 @@ TEST_F(SchemaStoreTest, SetNewSchemaOk) {
 TEST_F(SchemaStoreTest, SetSameSchemaOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -428,8 +325,6 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema_));
 
   // And one more for fun
-  result = SchemaStore::SetSchemaResult();
-  result.success = true;
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -439,12 +334,11 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
 TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -455,7 +349,6 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
   schema_.clear_types();
 
   // Set the incompatible schema
-  result = SchemaStore::SetSchemaResult();
   result.success = false;
   result.schema_types_deleted_by_name.emplace("email");
   result.schema_types_deleted_by_id.emplace(0);
@@ -466,7 +359,7 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
 TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
@@ -475,7 +368,6 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert("email");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -488,9 +380,6 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
                .Build();
 
   // Set the compatible schema
-  result = SchemaStore::SetSchemaResult();
-  result.success = true;
-  result.schema_types_new_by_name.insert("new_type");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -500,7 +389,7 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
 TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto schema =
       SchemaBuilder()
@@ -511,8 +400,6 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert("email");
-  result.schema_types_new_by_name.insert("message");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -558,7 +445,7 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
 TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto schema =
       SchemaBuilder()
@@ -569,8 +456,6 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert("email");
-  result.schema_types_new_by_name.insert("message");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -585,8 +470,6 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
 
   // Since we assign SchemaTypeIds based on order in the SchemaProto, this will
   // cause SchemaTypeIds to change
-  result = SchemaStore::SetSchemaResult();
-  result.success = true;
   result.old_schema_type_ids_changed.emplace(0);  // Old SchemaTypeId of "email"
   result.old_schema_type_ids_changed.emplace(
       1);  // Old SchemaTypeId of "message"
@@ -598,10 +481,10 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
-TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
+TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto schema =
       SchemaBuilder()
@@ -616,7 +499,6 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert("email");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -632,10 +514,10 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
                        .SetCardinality(CARDINALITY_OPTIONAL)))
                .Build();
 
+  // With a new indexed property, we'll need to reindex
+  result.index_incompatible = true;
+
   // Set the compatible schema
-  result = SchemaStore::SetSchemaResult();
-  result.success = true;
-  result.schema_types_index_incompatible_by_name.insert("email");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -645,7 +527,7 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
 TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Make two schemas. One that sets index_nested_properties to false and one
   // that sets it to true.
@@ -682,8 +564,6 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
   // Set schema with index_nested_properties=false to start.
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert("email");
-  result.schema_types_new_by_name.insert("person");
   EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -694,7 +574,7 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
   // 'person' is index incompatible.
   result = SchemaStore::SetSchemaResult();
   result.success = true;
-  result.schema_types_index_incompatible_by_name.insert("person");
+  result.index_incompatible = true;
   EXPECT_THAT(schema_store->SetSchema(nested_index_schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -704,7 +584,7 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
   // to 'person' is index incompatible.
   result = SchemaStore::SetSchemaResult();
   result.success = true;
-  result.schema_types_index_incompatible_by_name.insert("person");
+  result.index_incompatible = true;
   EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -714,7 +594,7 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
 TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto schema =
       SchemaBuilder()
@@ -729,7 +609,6 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert("email");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -773,185 +652,10 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
-TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-
-  // 1. Create a ContactPoint type with a repeated property and set that schema
-  SchemaTypeConfigBuilder contact_point_repeated_label =
-      SchemaTypeConfigBuilder()
-          .SetType("ContactPoint")
-          .AddProperty(PropertyConfigBuilder()
-                           .SetName("label")
-                           .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
-                           .SetCardinality(CARDINALITY_REPEATED));
-  SchemaProto old_schema =
-      SchemaBuilder().AddType(contact_point_repeated_label).Build();
-  ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_contact_point_type_id,
-                             schema_store->GetSchemaTypeId("ContactPoint"));
-
-  // 2. Create a type that references the ContactPoint type and make a backwards
-  // incompatible change to ContactPoint
-  SchemaTypeConfigBuilder contact_point_optional_label =
-      SchemaTypeConfigBuilder()
-          .SetType("ContactPoint")
-          .AddProperty(PropertyConfigBuilder()
-                           .SetName("label")
-                           .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
-                           .SetCardinality(CARDINALITY_OPTIONAL));
-  SchemaTypeConfigBuilder person =
-      SchemaTypeConfigBuilder().SetType("Person").AddProperty(
-          PropertyConfigBuilder()
-              .SetName("contactPoints")
-              .SetDataTypeDocument("ContactPoint",
-                                   /*index_nested_properties=*/true)
-              .SetCardinality(CARDINALITY_REPEATED));
-  SchemaProto new_schema = SchemaBuilder()
-                               .AddType(contact_point_optional_label)
-                               .AddType(person)
-                               .Build();
-
-  // 3. SetSchema should fail with ignore_errors_and_delete_documents=false and
-  // the old schema should remain
-  SchemaStore::SetSchemaResult expected_result;
-  expected_result.success = false;
-  expected_result.schema_types_incompatible_by_name.insert("ContactPoint");
-  expected_result.schema_types_incompatible_by_id.insert(
-      old_contact_point_type_id);
-  expected_result.schema_types_new_by_name.insert("Person");
-  EXPECT_THAT(
-      schema_store->SetSchema(new_schema,
-                              /*ignore_errors_and_delete_documents=*/false),
-      IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
-  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
-                             schema_store->GetSchema());
-  EXPECT_THAT(*actual_schema, EqualsProto(old_schema));
-
-  // 4. SetSchema should succeed with ignore_errors_and_delete_documents=true
-  // and the new schema should be set
-  expected_result.success = true;
-  EXPECT_THAT(
-      schema_store->SetSchema(new_schema,
-                              /*ignore_errors_and_delete_documents=*/true),
-      IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
-  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
-  EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
-}
-
-TEST_F(SchemaStoreTest, SetSchemaWithIndexIncompatibleNestedTypesOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-
-  // 1. Create a ContactPoint type with label that matches prefix and set that
-  // schema
-  SchemaTypeConfigBuilder contact_point_prefix_label =
-      SchemaTypeConfigBuilder()
-          .SetType("ContactPoint")
-          .AddProperty(PropertyConfigBuilder()
-                           .SetName("label")
-                           .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
-                           .SetCardinality(CARDINALITY_REPEATED));
-  SchemaProto old_schema =
-      SchemaBuilder().AddType(contact_point_prefix_label).Build();
-  ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
-
-  // 2. Create a type that references the ContactPoint type and make a index
-  // backwards incompatible change to ContactPoint
-  SchemaTypeConfigBuilder contact_point_exact_label =
-      SchemaTypeConfigBuilder()
-          .SetType("ContactPoint")
-          .AddProperty(PropertyConfigBuilder()
-                           .SetName("label")
-                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
-                           .SetCardinality(CARDINALITY_REPEATED));
-  SchemaTypeConfigBuilder person =
-      SchemaTypeConfigBuilder().SetType("Person").AddProperty(
-          PropertyConfigBuilder()
-              .SetName("contactPoints")
-              .SetDataTypeDocument("ContactPoint",
-                                   /*index_nested_properties=*/true)
-              .SetCardinality(CARDINALITY_REPEATED));
-  SchemaProto new_schema = SchemaBuilder()
-                               .AddType(contact_point_exact_label)
-                               .AddType(person)
-                               .Build();
-
-  // SetSchema should succeed, and only ContactPoint should be in
-  // schema_types_index_incompatible_by_name.
-  SchemaStore::SetSchemaResult expected_result;
-  expected_result.success = true;
-  expected_result.schema_types_index_incompatible_by_name.insert(
-      "ContactPoint");
-  expected_result.schema_types_new_by_name.insert("Person");
-  EXPECT_THAT(
-      schema_store->SetSchema(new_schema,
-                              /*ignore_errors_and_delete_documents=*/false),
-      IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
-  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
-                             schema_store->GetSchema());
-  EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
-}
-
-TEST_F(SchemaStoreTest, SetSchemaWithCompatibleNestedTypesOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-
-  // 1. Create a ContactPoint type with a optional property and set that schema
-  SchemaTypeConfigBuilder contact_point_optional_label =
-      SchemaTypeConfigBuilder()
-          .SetType("ContactPoint")
-          .AddProperty(PropertyConfigBuilder()
-                           .SetName("label")
-                           .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
-                           .SetCardinality(CARDINALITY_OPTIONAL));
-  SchemaProto old_schema =
-      SchemaBuilder().AddType(contact_point_optional_label).Build();
-  ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
-
-  // 2. Create a type that references the ContactPoint type and make a backwards
-  // compatible change to ContactPoint
-  SchemaTypeConfigBuilder contact_point_repeated_label =
-      SchemaTypeConfigBuilder()
-          .SetType("ContactPoint")
-          .AddProperty(PropertyConfigBuilder()
-                           .SetName("label")
-                           .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
-                           .SetCardinality(CARDINALITY_REPEATED));
-  SchemaTypeConfigBuilder person =
-      SchemaTypeConfigBuilder().SetType("Person").AddProperty(
-          PropertyConfigBuilder()
-              .SetName("contactPoints")
-              .SetDataTypeDocument("ContactPoint",
-                                   /*index_nested_properties=*/true)
-              .SetCardinality(CARDINALITY_REPEATED));
-  SchemaProto new_schema = SchemaBuilder()
-                               .AddType(contact_point_repeated_label)
-                               .AddType(person)
-                               .Build();
-
-  // 3. SetSchema should succeed, and only ContactPoint should be in
-  // schema_types_changed_fully_compatible_by_name.
-  SchemaStore::SetSchemaResult expected_result;
-  expected_result.success = true;
-  expected_result.schema_types_changed_fully_compatible_by_name.insert(
-      "ContactPoint");
-  expected_result.schema_types_new_by_name.insert("Person");
-  EXPECT_THAT(schema_store->SetSchema(
-                  new_schema, /*ignore_errors_and_delete_documents=*/false),
-              IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
-  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
-                             schema_store->GetSchema());
-  EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
-}
-
 TEST_F(SchemaStoreTest, GetSchemaTypeId) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   schema_.clear_types();
 
@@ -967,8 +671,6 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert(first_type);
-  result.schema_types_new_by_name.insert(second_type);
   EXPECT_THAT(schema_store->SetSchema(schema_),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
@@ -979,7 +681,7 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) {
 TEST_F(SchemaStoreTest, ComputeChecksumDefaultOnEmptySchemaStore) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   Crc32 default_checksum;
   EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(default_checksum));
@@ -988,7 +690,7 @@ TEST_F(SchemaStoreTest, ComputeChecksumDefaultOnEmptySchemaStore) {
 TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto foo_schema =
       SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
@@ -1004,7 +706,7 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
 TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto foo_schema =
       SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
@@ -1017,14 +719,14 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
   schema_store.reset();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      schema_store, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(checksum));
 }
 
 TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto foo_schema =
       SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
@@ -1048,7 +750,7 @@ TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
 TEST_F(SchemaStoreTest, PersistToDiskFineForEmptySchemaStore) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Persisting is fine and shouldn't affect anything
   ICING_EXPECT_OK(schema_store->PersistToDisk());
@@ -1057,7 +759,7 @@ TEST_F(SchemaStoreTest, PersistToDiskFineForEmptySchemaStore) {
 TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   SchemaProto schema =
       SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
@@ -1082,7 +784,7 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
 
   // And we get the same schema back on reinitialization
   ICING_ASSERT_OK_AND_ASSIGN(
-      schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      schema_store, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
@@ -1090,7 +792,7 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
 TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
   // Create a schema with two types: one simple type and one type that uses all
   // 16 sections.
@@ -1127,8 +829,6 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
 
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  result.schema_types_new_by_name.insert("email");
-  result.schema_types_new_by_name.insert("fullSectionsType");
   EXPECT_THAT(schema_store->SetSchema(schema),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
@@ -1139,114 +839,6 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
   EXPECT_THAT(storage_info.num_schema_types_sections_exhausted(), Eq(1));
 }
 
-TEST_F(SchemaStoreTest, GetDebugInfo) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-
-  // Set schema
-  ASSERT_THAT(
-      schema_store->SetSchema(schema_),
-      IsOkAndHolds(EqualsSetSchemaResult(SchemaStore::SetSchemaResult{
-          .success = true,
-          .schema_types_new_by_name = {schema_.types(0).schema_type()}})));
-
-  // Check debug info
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaDebugInfoProto out,
-                             schema_store->GetDebugInfo());
-  EXPECT_THAT(out.schema(), EqualsProto(schema_));
-  EXPECT_THAT(out.crc(), Gt(0));
-}
-
-TEST_F(SchemaStoreTest, GetDebugInfoForEmptySchemaStore) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-
-  // Check debug info before setting a schema
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaDebugInfoProto out,
-                             schema_store->GetDebugInfo());
-  SchemaDebugInfoProto expected_out;
-  expected_out.set_crc(0);
-  EXPECT_THAT(out, EqualsProto(expected_out));
-}
-
-TEST_F(SchemaStoreTest, InitializeRegenerateDerivedFilesFailure) {
-  // This test covers the first point that RegenerateDerivedFiles could fail.
-  // This should simply result in SetSchema::Create returning an INTERNAL error.
-
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-    SchemaProto schema = SchemaBuilder()
-                             .AddType(SchemaTypeConfigBuilder().SetType("Type"))
-                             .Build();
-    ICING_ASSERT_OK(schema_store->SetSchema(std::move(schema)));
-  }
-
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ON_CALL(*mock_filesystem,
-          CreateDirectoryRecursively(HasSubstr("key_mapper_dir")))
-      .WillByDefault(Return(false));
-  {
-    EXPECT_THAT(SchemaStore::Create(mock_filesystem.get(), schema_store_dir_,
-                                    &fake_clock_),
-                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-  }
-}
-
-TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) {
-  // This test covers the second point that RegenerateDerivedFiles could fail.
-  // If handled correctly, the schema store and section manager should still be
-  // in the original, valid state.
-  SchemaTypeConfigProto type =
-      SchemaTypeConfigBuilder()
-          .SetType("Type")
-          .AddProperty(PropertyConfigBuilder()
-                           .SetName("prop1")
-                           .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
-                           .SetCardinality(CARDINALITY_OPTIONAL))
-          .Build();
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
-    SchemaProto schema = SchemaBuilder().AddType(type).Build();
-    ICING_ASSERT_OK(schema_store->SetSchema(std::move(schema)));
-  }
-
-  {
-    auto mock_filesystem = std::make_unique<MockFilesystem>();
-    ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(mock_filesystem.get(), schema_store_dir_,
-                            &fake_clock_));
-
-    ON_CALL(*mock_filesystem,
-            CreateDirectoryRecursively(HasSubstr("key_mapper_dir")))
-        .WillByDefault(Return(false));
-    SchemaProto schema =
-        SchemaBuilder()
-            .AddType(type)
-            .AddType(SchemaTypeConfigBuilder().SetType("Type2"))
-            .Build();
-    EXPECT_THAT(schema_store->SetSchema(std::move(schema)),
-                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-    DocumentProto document = DocumentBuilder()
-                                 .SetSchema("Type")
-                                 .AddStringProperty("prop1", "foo bar baz")
-                                 .Build();
-    SectionMetadata expected_metadata(/*id_in=*/0, MATCH_EXACT, TOKENIZER_PLAIN,
-                                      "prop1");
-    ICING_ASSERT_OK_AND_ASSIGN(std::vector<Section> sections,
-                               schema_store->ExtractSections(document));
-    ASSERT_THAT(sections, SizeIs(1));
-    EXPECT_THAT(sections.at(0).metadata, Eq(expected_metadata));
-    EXPECT_THAT(sections.at(0).content, ElementsAre("foo bar baz"));
-  }
-}
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 88b6946..cabe76d 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -37,20 +37,6 @@ namespace lib {
 
 namespace {
 
-bool ArePropertiesEqual(const PropertyConfigProto& old_property,
-                        const PropertyConfigProto& new_property) {
-  return old_property.property_name() == new_property.property_name() &&
-         old_property.data_type() == new_property.data_type() &&
-         old_property.schema_type() == new_property.schema_type() &&
-         old_property.cardinality() == new_property.cardinality() &&
-         old_property.string_indexing_config().term_match_type() ==
-             new_property.string_indexing_config().term_match_type() &&
-         old_property.string_indexing_config().tokenizer_type() ==
-             new_property.string_indexing_config().tokenizer_type() &&
-         old_property.document_indexing_config().index_nested_properties() ==
-             new_property.document_indexing_config().index_nested_properties();
-}
-
 bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
                              const PropertyConfigProto& new_property) {
   if (old_property.cardinality() < new_property.cardinality()) {
@@ -107,33 +93,6 @@ bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
          old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
 }
 
-void AddIncompatibleChangeToDelta(
-    std::unordered_set<std::string>& incompatible_delta,
-    const SchemaTypeConfigProto& old_type_config,
-    const SchemaUtil::DependencyMap& new_schema_dependency_map,
-    const SchemaUtil::TypeConfigMap& old_type_config_map,
-    const SchemaUtil::TypeConfigMap& new_type_config_map) {
-  // If this type is incompatible, then every type that depends on it might
-  // also be incompatible. Use the dependency map to mark those ones as
-  // incompatible too.
-  incompatible_delta.insert(old_type_config.schema_type());
-  auto parent_types_itr =
-      new_schema_dependency_map.find(old_type_config.schema_type());
-  if (parent_types_itr != new_schema_dependency_map.end()) {
-    for (std::string_view parent_type : parent_types_itr->second) {
-      // The types from new_schema that depend on the current
-      // old_type_config may not present in old_schema.
-      // Those types will be listed at schema_delta.schema_types_new
-      // instead.
-      std::string parent_type_str(parent_type);
-      if (old_type_config_map.find(parent_type_str) !=
-          old_type_config_map.end()) {
-        incompatible_delta.insert(std::move(parent_type_str));
-      }
-    }
-  }
-}
-
 }  // namespace
 
 libtextclassifier3::Status ExpandTranstiveDependencies(
@@ -473,9 +432,9 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
     const SchemaProto& old_schema, const SchemaProto& new_schema,
     const DependencyMap& new_schema_dependency_map) {
   SchemaDelta schema_delta;
+  schema_delta.index_incompatible = false;
 
-  TypeConfigMap old_type_config_map, new_type_config_map;
-  BuildTypeConfigMap(old_schema, &old_type_config_map);
+  TypeConfigMap new_type_config_map;
   BuildTypeConfigMap(new_schema, &new_type_config_map);
 
   // Iterate through and check each field of the old schema
@@ -504,9 +463,6 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
 
     // If there is a different number of properties, then there must have been a
     // change.
-    bool has_property_changed =
-        old_type_config.properties_size() !=
-        new_schema_type_and_config->second.properties_size();
     bool is_incompatible = false;
     bool is_index_incompatible = false;
     for (const auto& old_property_config : old_type_config.properties()) {
@@ -542,11 +498,6 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
 
       const PropertyConfigProto* new_property_config =
           new_property_name_and_config->second;
-      if (!has_property_changed &&
-          !ArePropertiesEqual(old_property_config, *new_property_config)) {
-        // Finally found a property that changed.
-        has_property_changed = true;
-      }
 
       if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
         ICING_VLOG(1) << absl_ports::StrCat(
@@ -594,33 +545,26 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
     }
 
     if (is_incompatible) {
-      AddIncompatibleChangeToDelta(schema_delta.schema_types_incompatible,
-                                   old_type_config, new_schema_dependency_map,
-                                   old_type_config_map, new_type_config_map);
+      // If this type is incompatible, then every type that depends on it might
+      // also be incompatible. Use the dependency map to mark those ones as
+      // incompatible too.
+      schema_delta.schema_types_incompatible.insert(
+          old_type_config.schema_type());
+      auto parent_types_itr =
+          new_schema_dependency_map.find(old_type_config.schema_type());
+      if (parent_types_itr != new_schema_dependency_map.end()) {
+        schema_delta.schema_types_incompatible.reserve(
+            schema_delta.schema_types_incompatible.size() +
+            parent_types_itr->second.size());
+        schema_delta.schema_types_incompatible.insert(
+            parent_types_itr->second.begin(), parent_types_itr->second.end());
+      }
     }
 
     if (is_index_incompatible) {
-      AddIncompatibleChangeToDelta(schema_delta.schema_types_index_incompatible,
-                                   old_type_config, new_schema_dependency_map,
-                                   old_type_config_map, new_type_config_map);
+      schema_delta.index_incompatible = true;
     }
 
-    if (!is_incompatible && !is_index_incompatible && has_property_changed) {
-      schema_delta.schema_types_changed_fully_compatible.insert(
-          old_type_config.schema_type());
-    }
-
-    // Lastly, remove this type from the map. We know that this type can't
-    // come up in future iterations through the old schema types because the old
-    // type config has unique types.
-    new_type_config_map.erase(old_type_config.schema_type());
-  }
-
-  // Any types that are still present in the new_type_config_map are newly added
-  // types.
-  schema_delta.schema_types_new.reserve(new_type_config_map.size());
-  for (auto& kvp : new_type_config_map) {
-    schema_delta.schema_types_new.insert(std::move(kvp.first));
   }
 
   return schema_delta;
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index fa80b15..abbc55d 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -41,6 +41,12 @@ class SchemaUtil {
                          std::unordered_set<std::string_view>>;
 
   struct SchemaDelta {
+    // Whether an indexing config has changed, requiring the index to be
+    // regenerated. We don't list out all the types that make the index
+    // incompatible because our index isn't optimized for that. It's much easier
+    // to reset the entire index and reindex every document.
+    bool index_incompatible = false;
+
     // Which schema types were present in the old schema, but were deleted from
     // the new schema.
     std::unordered_set<std::string> schema_types_deleted;
@@ -49,28 +55,10 @@ class SchemaUtil {
     // could invalidate existing Documents of that schema type.
     std::unordered_set<std::string> schema_types_incompatible;
 
-    // Schema types that were added in the new schema. Represented by the
-    // `schema_type` field in the SchemaTypeConfigProto.
-    std::unordered_set<std::string> schema_types_new;
-
-    // Schema types that were changed in a way that was backwards compatible and
-    // didn't invalidate the index. Represented by the `schema_type` field in
-    // the SchemaTypeConfigProto.
-    std::unordered_set<std::string> schema_types_changed_fully_compatible;
-
-    // Schema types that were changed in a way that was backwards compatible,
-    // but invalidated the index. Represented by the `schema_type` field in the
-    // SchemaTypeConfigProto.
-    std::unordered_set<std::string> schema_types_index_incompatible;
-
     bool operator==(const SchemaDelta& other) const {
-      return schema_types_deleted == other.schema_types_deleted &&
-             schema_types_incompatible == other.schema_types_incompatible &&
-             schema_types_new == other.schema_types_new &&
-             schema_types_changed_fully_compatible ==
-                 other.schema_types_changed_fully_compatible &&
-             schema_types_index_incompatible ==
-                 other.schema_types_index_incompatible;
+      return index_incompatible == other.index_incompatible &&
+             schema_types_deleted == other.schema_types_deleted &&
+             schema_types_incompatible == other.schema_types_incompatible;
     }
   };
 
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index f28a2f8..049dd79 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -38,32 +38,32 @@ constexpr char kEmailType[] = "EmailMessage";
 constexpr char kMessageType[] = "Text";
 constexpr char kPersonType[] = "Person";
 
-constexpr PropertyConfigProto::DataType::Code TYPE_DOCUMENT =
-    PropertyConfigProto::DataType::DOCUMENT;
-constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
-    PropertyConfigProto::DataType::STRING;
-constexpr PropertyConfigProto::DataType::Code TYPE_INT =
-    PropertyConfigProto::DataType::INT64;
-constexpr PropertyConfigProto::DataType::Code TYPE_DOUBLE =
-    PropertyConfigProto::DataType::DOUBLE;
-
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_UNKNOWN =
-    PropertyConfigProto::Cardinality::UNKNOWN;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
-    PropertyConfigProto::Cardinality::REQUIRED;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
-    PropertyConfigProto::Cardinality::REPEATED;
-
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_NONE =
-    StringIndexingConfig::TokenizerType::NONE;
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
-
-constexpr TermMatchType::Code MATCH_UNKNOWN = TermMatchType::UNKNOWN;
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
-constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+constexpr PropertyConfigProto_DataType_Code TYPE_DOCUMENT =
+    PropertyConfigProto_DataType_Code_DOCUMENT;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_INT =
+    PropertyConfigProto_DataType_Code_INT64;
+constexpr PropertyConfigProto_DataType_Code TYPE_DOUBLE =
+    PropertyConfigProto_DataType_Code_DOUBLE;
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_UNKNOWN =
+    PropertyConfigProto_Cardinality_Code_UNKNOWN;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
+    StringIndexingConfig_TokenizerType_Code_NONE;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_UNKNOWN = TermMatchType_Code_UNKNOWN;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
 
 TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
   // Create a schema with the following dependencies:
@@ -705,7 +705,6 @@ TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
           .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.schema_types_changed_fully_compatible.insert(kEmailType);
   SchemaUtil::DependencyMap no_dependencies_map;
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
                   old_schema, new_schema_with_optional, no_dependencies_map),
@@ -818,8 +817,6 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
 
   // We can have the new schema be less restrictive, OPTIONAL->REPEATED;
   SchemaUtil::SchemaDelta compatible_schema_delta;
-  compatible_schema_delta.schema_types_changed_fully_compatible.insert(
-      kEmailType);
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
                   /*old_schema=*/more_restrictive_schema,
                   /*new_schema=*/less_restrictive_schema, no_dependencies_map),
@@ -915,6 +912,7 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
   SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
       old_schema, new_schema, dependencies_map);
   EXPECT_THAT(actual, Eq(schema_delta));
+  EXPECT_THAT(actual.index_incompatible, testing::IsFalse());
   EXPECT_THAT(actual.schema_types_incompatible,
               testing::ElementsAre(kEmailType));
   EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty());
@@ -946,7 +944,7 @@ TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
           .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  schema_delta.index_incompatible = true;
 
   // New schema gained a new indexed property.
   SchemaUtil::DependencyMap no_dependencies_map;
@@ -993,7 +991,7 @@ TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
           .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  schema_delta.index_incompatible = true;
   SchemaUtil::DependencyMap no_dependencies_map;
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
                                                     no_dependencies_map),
@@ -1033,7 +1031,6 @@ TEST(SchemaUtilTest, AddingTypeIsCompatible) {
           .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.schema_types_new.insert(kEmailType);
   SchemaUtil::DependencyMap no_dependencies_map;
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
                                                     no_dependencies_map),
@@ -1112,7 +1109,7 @@ TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  schema_delta.schema_types_index_incompatible.emplace(kEmailType);
+  schema_delta.index_incompatible = true;
   SchemaUtil::DependencyMap no_dependencies_map;
   SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
       old_schema, new_schema, no_dependencies_map);
@@ -1160,7 +1157,7 @@ TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
   // should make kPersonType index_incompatible. kEmailType should be
   // unaffected.
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+  schema_delta.index_incompatible = true;
   SchemaUtil::DependencyMap dependencies_map = {{kEmailType, {kPersonType}}};
   SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
       no_nested_index_schema, nested_index_schema, dependencies_map);
diff --git a/icing/schema/section.h b/icing/schema/section.h
index 8b2ba55..40e623a 100644
--- a/icing/schema/section.h
+++ b/icing/schema/section.h
@@ -77,11 +77,6 @@ struct SectionMetadata {
         id(id_in),
         tokenizer(tokenizer),
         term_match_type(term_match_type_in) {}
-
-  bool operator==(const SectionMetadata& rhs) const {
-    return path == rhs.path && id == rhs.id && tokenizer == rhs.tokenizer &&
-           term_match_type == rhs.term_match_type;
-  }
 };
 
 // Section is an icing internal concept similar to document property but with
diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc
index 28d385e..4822d7f 100644
--- a/icing/scoring/bm25f-calculator.cc
+++ b/icing/scoring/bm25f-calculator.cc
@@ -26,7 +26,6 @@
 #include "icing/store/corpus-associated-scoring-data.h"
 #include "icing/store/corpus-id.h"
 #include "icing/store/document-associated-score-data.h"
-#include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
 
 namespace icing {
@@ -43,11 +42,8 @@ constexpr float k1_ = 1.2f;
 constexpr float b_ = 0.7f;
 
 // TODO(b/158603900): add tests for Bm25fCalculator
-Bm25fCalculator::Bm25fCalculator(
-    const DocumentStore* document_store,
-    std::unique_ptr<SectionWeights> section_weights)
-    : document_store_(document_store),
-      section_weights_(std::move(section_weights)) {}
+Bm25fCalculator::Bm25fCalculator(const DocumentStore* document_store)
+    : document_store_(document_store) {}
 
 // During initialization, Bm25fCalculator iterates through
 // hit-iterators for each query term to pre-compute n(q_i) for each corpus under
@@ -125,9 +121,9 @@ float Bm25fCalculator::ComputeScore(const DocHitInfoIterator* query_it,
 // Compute inverse document frequency (IDF) weight for query term in the given
 // corpus, and cache it in the map.
 //
-//                    N - n(q_i) + 0.5
-// IDF(q_i) = ln(1 + ------------------)
-//                      n(q_i) + 0.5
+//                     N - n(q_i) + 0.5
+// IDF(q_i) = log(1 + ------------------)
+//                       n(q_i) + 0.5
 //
 // where N is the number of documents in the corpus, and n(q_i) is the number
 // of documents in the corpus containing the query term q_i.
@@ -153,7 +149,7 @@ float Bm25fCalculator::GetCorpusIdfWeightForTerm(std::string_view term,
   uint32_t num_docs = csdata.num_docs();
   uint32_t nqi = corpus_nqi_map_[corpus_term_info.value];
   float idf =
-      nqi != 0 ? log(1.0f + (num_docs - nqi + 0.5f) / (nqi + 0.5f)) : 0.0f;
+      nqi != 0 ? log(1.0f + (num_docs - nqi + 0.5f) / (nqi - 0.5f)) : 0.0f;
   corpus_idf_map_.insert({corpus_term_info.value, idf});
   ICING_VLOG(1) << IcingStringUtil::StringPrintf(
       "corpus_id:%d term:%s N:%d nqi:%d idf:%f", corpus_id,
@@ -162,11 +158,6 @@ float Bm25fCalculator::GetCorpusIdfWeightForTerm(std::string_view term,
 }
 
 // Get per corpus average document length and cache the result in the map.
-// The average doc length is calculated as:
-//
-//                    total_tokens_in_corpus
-// Avg Doc Length =  -------------------------
-//                    num_docs_in_corpus + 1
 float Bm25fCalculator::GetCorpusAvgDocLength(CorpusId corpus_id) {
   auto iter = corpus_avgdl_map_.find(corpus_id);
   if (iter != corpus_avgdl_map_.end()) {
@@ -200,8 +191,8 @@ float Bm25fCalculator::ComputedNormalizedTermFrequency(
     const DocumentAssociatedScoreData& data) {
   uint32_t dl = data.length_in_tokens();
   float avgdl = GetCorpusAvgDocLength(data.corpus_id());
-  float f_q = ComputeTermFrequencyForMatchedSections(
-      data.corpus_id(), term_match_info, hit_info.document_id());
+  float f_q =
+      ComputeTermFrequencyForMatchedSections(data.corpus_id(), term_match_info);
   float normalized_tf =
       f_q * (k1_ + 1) / (f_q + k1_ * (1 - b_ + b_ * dl / avgdl));
 
@@ -211,41 +202,23 @@ float Bm25fCalculator::ComputedNormalizedTermFrequency(
   return normalized_tf;
 }
 
+// Note: once we support section weights, we should update this function to
+// compute the weighted term frequency.
 float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
-    CorpusId corpus_id, const TermMatchInfo& term_match_info,
-    DocumentId document_id) const {
+    CorpusId corpus_id, const TermMatchInfo& term_match_info) const {
   float sum = 0.0f;
   SectionIdMask sections = term_match_info.section_ids_mask;
-  SchemaTypeId schema_type_id = GetSchemaTypeId(document_id);
-
   while (sections != 0) {
     SectionId section_id = __builtin_ctz(sections);
     sections &= ~(1u << section_id);
 
     Hit::TermFrequency tf = term_match_info.term_frequencies[section_id];
-    double weighted_tf = tf * section_weights_->GetNormalizedSectionWeight(
-                                  schema_type_id, section_id);
     if (tf != Hit::kNoTermFrequency) {
-      sum += weighted_tf;
+      sum += tf;
     }
   }
   return sum;
 }
 
-SchemaTypeId Bm25fCalculator::GetSchemaTypeId(DocumentId document_id) const {
-  auto filter_data_or = document_store_->GetDocumentFilterData(document_id);
-  if (!filter_data_or.ok()) {
-    // This should never happen. The only failure case for
-    // GetDocumentFilterData is if the document_id is outside of the range of
-    // allocated document_ids, which shouldn't be possible since we're getting
-    // this document_id from the posting lists.
-    ICING_LOG(WARNING) << IcingStringUtil::StringPrintf(
-        "No document filter data for document [%d]", document_id);
-    return kInvalidSchemaTypeId;
-  }
-  DocumentFilterData data = filter_data_or.ValueOrDie();
-  return data.schema_type_id();
-}
-
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/scoring/bm25f-calculator.h b/icing/scoring/bm25f-calculator.h
index 05009d8..91b4f24 100644
--- a/icing/scoring/bm25f-calculator.h
+++ b/icing/scoring/bm25f-calculator.h
@@ -22,7 +22,6 @@
 
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/index/icing-bit-util.h"
-#include "icing/scoring/section-weights.h"
 #include "icing/store/corpus-id.h"
 #include "icing/store/document-store.h"
 
@@ -63,8 +62,7 @@ namespace lib {
 // see: glossary/bm25
 class Bm25fCalculator {
  public:
-  explicit Bm25fCalculator(const DocumentStore *document_store_,
-                           std::unique_ptr<SectionWeights> section_weights_);
+  explicit Bm25fCalculator(const DocumentStore *document_store_);
 
   // Precompute and cache statistics relevant to BM25F.
   // Populates term_id_map_ and corpus_nqi_map_ for use while scoring other
@@ -110,43 +108,18 @@ class Bm25fCalculator {
     }
   };
 
-  // Returns idf weight for the term and provided corpus.
   float GetCorpusIdfWeightForTerm(std::string_view term, CorpusId corpus_id);
-
-  // Returns the average document length for the corpus. The average is
-  // calculated as the sum of tokens in the corpus' documents over the total
-  // number of documents plus one.
   float GetCorpusAvgDocLength(CorpusId corpus_id);
-
-  // Returns the normalized term frequency for the term match and document hit.
-  // This normalizes the term frequency by applying smoothing parameters and
-  // factoring document length.
   float ComputedNormalizedTermFrequency(
       const TermMatchInfo &term_match_info, const DocHitInfo &hit_info,
       const DocumentAssociatedScoreData &data);
-
-  // Returns the weighted term frequency for the term match and document. For
-  // each section the term is present, we scale the term frequency by its
-  // section weight. We return the sum of the weighted term frequencies over all
-  // sections.
   float ComputeTermFrequencyForMatchedSections(
-      CorpusId corpus_id, const TermMatchInfo &term_match_info,
-      DocumentId document_id) const;
+      CorpusId corpus_id, const TermMatchInfo &term_match_info) const;
 
-  // Returns the schema type id for the document by retrieving it from the
-  // DocumentFilterData.
-  SchemaTypeId GetSchemaTypeId(DocumentId document_id) const;
-
-  // Clears cached scoring data and prepares the calculator for a new scoring
-  // run.
   void Clear();
 
   const DocumentStore *document_store_;  // Does not own.
 
-  // Used for accessing normalized section weights when computing the weighted
-  // term frequency.
-  std::unique_ptr<SectionWeights> section_weights_;
-
   // Map from query term to compact term ID.
   // Necessary as a key to the other maps.
   // The use of the string_view as key here means that the query_term_iterators
@@ -157,6 +130,7 @@ class Bm25fCalculator {
   // Necessary to calculate the normalized term frequency.
   // This information is cached in the DocumentStore::CorpusScoreCache
   std::unordered_map<CorpusId, float> corpus_avgdl_map_;
+
   // Map from <corpus ID, term ID> to number of documents containing term q_i,
   // called n(q_i).
   // Necessary to calculate IDF(q_i) (inverse document frequency).
diff --git a/icing/scoring/ranker.cc b/icing/scoring/ranker.cc
index 117f44c..fecee82 100644
--- a/icing/scoring/ranker.cc
+++ b/icing/scoring/ranker.cc
@@ -32,7 +32,6 @@ namespace {
 
 // Helper function to wrap the heapify algorithm, it heapifies the target
 // subtree node in place.
-// TODO(b/152934343) refactor the heapify function and making it into a class.
 void Heapify(
     std::vector<ScoredDocumentHit>* scored_document_hits,
     int target_subtree_root_index,
@@ -72,80 +71,6 @@ void Heapify(
   }
 }
 
-// Heapify the given term vector from top to bottom. Call it after add or
-// replace an element at the front of the vector.
-void HeapifyTermDown(std::vector<TermMetadata>& scored_terms,
-                     int target_subtree_root_index) {
-  int heap_size = scored_terms.size();
-  if (target_subtree_root_index >= heap_size) {
-    return;
-  }
-
-  // Initializes subtree root as the current minimum node.
-  int min = target_subtree_root_index;
-  // If we represent a heap in an array/vector, indices of left and right
-  // children can be calculated as such.
-  const int left = target_subtree_root_index * 2 + 1;
-  const int right = target_subtree_root_index * 2 + 2;
-
-  // If left child is smaller than current minimum.
-  if (left < heap_size &&
-      scored_terms.at(left).hit_count < scored_terms.at(min).hit_count) {
-    min = left;
-  }
-
-  // If right child is smaller than current minimum.
-  if (right < heap_size &&
-      scored_terms.at(right).hit_count < scored_terms.at(min).hit_count) {
-    min = right;
-  }
-
-  // If the minimum is not the subtree root, swap and continue heapifying the
-  // lower level subtree.
-  if (min != target_subtree_root_index) {
-    std::swap(scored_terms.at(min),
-              scored_terms.at(target_subtree_root_index));
-    HeapifyTermDown(scored_terms, min);
-  }
-}
-
-// Heapify the given term vector from bottom to top. Call it after add an
-// element at the end of the vector.
-void HeapifyTermUp(std::vector<TermMetadata>& scored_terms,
-                   int target_subtree_child_index) {
-  // If we represent a heap in an array/vector, indices of root can be
-  // calculated as such.
-  const int root = (target_subtree_child_index + 1) / 2 - 1;
-
-  // If the current child is smaller than the root, swap and continue heapifying
-  // the upper level subtree
-  if (root >= 0 && scored_terms.at(target_subtree_child_index).hit_count <
-                       scored_terms.at(root).hit_count) {
-    std::swap(scored_terms.at(root),
-              scored_terms.at(target_subtree_child_index));
-    HeapifyTermUp(scored_terms, root);
-  }
-}
-
-TermMetadata PopRootTerm(std::vector<TermMetadata>& scored_terms) {
-  if (scored_terms.empty()) {
-    // Return an invalid TermMetadata as a sentinel value.
-    return TermMetadata(/*content_in=*/"", /*hit_count_in=*/-1);
-  }
-
-  // Steps to extract root from heap:
-  // 1. copy out root
-  TermMetadata root = scored_terms.at(0);
-  const size_t last_node_index = scored_terms.size() - 1;
-  // 2. swap root and the last node
-  std::swap(scored_terms.at(0), scored_terms.at(last_node_index));
-  // 3. remove last node
-  scored_terms.pop_back();
-  // 4. heapify root
-  HeapifyTermDown(scored_terms, /*target_subtree_root_index=*/0);
-  return root;
-}
-
 // Helper function to extract the root from the heap. The heap structure will be
 // maintained.
 //
@@ -190,19 +115,6 @@ void BuildHeapInPlace(
   }
 }
 
-void PushToTermHeap(TermMetadata term, int number_to_return,
-                    std::vector<TermMetadata>& scored_terms_heap) {
-  if (scored_terms_heap.size() < number_to_return) {
-    scored_terms_heap.push_back(std::move(term));
-    // We insert at end, so we should heapify bottom up.
-    HeapifyTermUp(scored_terms_heap, scored_terms_heap.size() - 1);
-  } else if (scored_terms_heap.at(0).hit_count < term.hit_count) {
-    scored_terms_heap.at(0) = std::move(term);
-    // We insert at root, so we should heapify top down.
-    HeapifyTermDown(scored_terms_heap, /*target_subtree_root_index=*/0);
-  }
-}
-
 std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
     std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results,
     const ScoredDocumentHitComparator& scored_document_hit_comparator) {
@@ -222,15 +134,5 @@ std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
   return scored_document_hit_result;
 }
 
-std::vector<TermMetadata> PopAllTermsFromHeap(
-    std::vector<TermMetadata>& scored_terms_heap) {
-  std::vector<TermMetadata> top_term_result;
-  top_term_result.reserve(scored_terms_heap.size());
-  while (!scored_terms_heap.empty()) {
-    top_term_result.push_back(PopRootTerm(scored_terms_heap));
-  }
-  return top_term_result;
-}
-
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/scoring/ranker.h b/icing/scoring/ranker.h
index 81838f3..785c133 100644
--- a/icing/scoring/ranker.h
+++ b/icing/scoring/ranker.h
@@ -17,7 +17,6 @@
 
 #include <vector>
 
-#include "icing/index/term-metadata.h"
 #include "icing/scoring/scored-document-hit.h"
 
 // Provides functionality to get the top N results from an unsorted vector.
@@ -40,18 +39,6 @@ std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
     std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results,
     const ScoredDocumentHitComparator& scored_document_hit_comparator);
 
-// The heap is a min-heap. So that we can avoid some push operations by
-// comparing to the root term, and only pushing if greater than root. The time
-// complexity for a single push is O(lgK) which K is the number_to_return.
-// REQUIRED: scored_terms_heap is not null.
-void PushToTermHeap(TermMetadata term, int number_to_return,
-                    std::vector<TermMetadata>& scored_terms_heap);
-
-// Return all terms from the given terms heap. And since the heap is a min-heap,
-// the output vector will be increasing order.
-// REQUIRED: scored_terms_heap is not null.
-std::vector<TermMetadata> PopAllTermsFromHeap(
-    std::vector<TermMetadata>& scored_terms_heap);
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index cc1d995..e940e98 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -117,8 +117,7 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(scoring_spec, document_store.get(),
-                               schema_store.get()));
+      ScoringProcessor::Create(scoring_spec, document_store.get()));
 
   int num_to_score = state.range(0);
   int num_of_documents = state.range(1);
@@ -221,8 +220,7 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) {
       ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(scoring_spec, document_store.get(),
-                               schema_store.get()));
+      ScoringProcessor::Create(scoring_spec, document_store.get()));
 
   int num_to_score = state.range(0);
   int num_of_documents = state.range(1);
@@ -324,8 +322,7 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) {
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(scoring_spec, document_store.get(),
-                               schema_store.get()));
+      ScoringProcessor::Create(scoring_spec, document_store.get()));
 
   int num_to_score = state.range(0);
   int num_of_documents = state.range(1);
@@ -393,122 +390,6 @@ BENCHMARK(BM_ScoreAndRankDocumentHitsNoScoring)
     ->ArgPair(10000, 18000)
     ->ArgPair(10000, 20000);
 
-void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) {
-  const std::string base_dir = GetTestTempDir() + "/score_and_rank_benchmark";
-  const std::string document_store_dir = base_dir + "/document_store";
-  const std::string schema_store_dir = base_dir + "/schema_store";
-
-  // Creates file directories
-  Filesystem filesystem;
-  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
-  filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
-  filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
-
-  Clock clock;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem, base_dir, &clock));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem, document_store_dir, &clock,
-                            schema_store.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
-
-  ScoringSpecProto scoring_spec;
-  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(scoring_spec, document_store.get(),
-                               schema_store.get()));
-
-  int num_to_score = state.range(0);
-  int num_of_documents = state.range(1);
-
-  std::mt19937 random_generator;
-  std::uniform_int_distribution<int> distribution(
-      1, std::numeric_limits<int>::max());
-
-  SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
-
-  // Puts documents into document store
-  std::vector<DocHitInfo> doc_hit_infos;
-  for (int i = 0; i < num_of_documents; i++) {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        DocumentId document_id,
-        document_store->Put(CreateEmailDocument(
-                                /*id=*/i, /*document_score=*/1,
-                                /*creation_timestamp_ms=*/1),
-                            /*num_tokens=*/10));
-    DocHitInfo doc_hit = DocHitInfo(document_id, section_id_mask);
-    // Set five matches for term "foo" for each document hit.
-    doc_hit.UpdateSection(section_id, /*hit_term_frequency=*/5);
-    doc_hit_infos.push_back(doc_hit);
-  }
-
-  ScoredDocumentHitComparator scored_document_hit_comparator(
-      /*is_descending=*/true);
-
-  for (auto _ : state) {
-    // Creates a dummy DocHitInfoIterator with results, we need to pause the
-    // timer here so that the cost of copying test data is not included.
-    state.PauseTiming();
-    std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
-        std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-    // Create a query term iterator that assigns the document hits to term
-    // "foo".
-    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
-        query_term_iterators;
-    query_term_iterators["foo"] =
-        std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-    state.ResumeTiming();
-
-    std::vector<ScoredDocumentHit> scored_document_hits =
-        scoring_processor->Score(std::move(doc_hit_info_iterator), num_to_score,
-                                 &query_term_iterators);
-
-    BuildHeapInPlace(&scored_document_hits, scored_document_hit_comparator);
-    // Ranks and gets the first page, 20 is a common page size
-    std::vector<ScoredDocumentHit> results =
-        PopTopResultsFromHeap(&scored_document_hits, /*num_results=*/20,
-                              scored_document_hit_comparator);
-  }
-
-  // Clean up
-  document_store.reset();
-  schema_store.reset();
-  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
-}
-BENCHMARK(BM_ScoreAndRankDocumentHitsByRelevanceScoring)
-    // num_to_score, num_of_documents in document store
-    ->ArgPair(1000, 30000)
-    ->ArgPair(3000, 30000)
-    ->ArgPair(5000, 30000)
-    ->ArgPair(7000, 30000)
-    ->ArgPair(9000, 30000)
-    ->ArgPair(11000, 30000)
-    ->ArgPair(13000, 30000)
-    ->ArgPair(15000, 30000)
-    ->ArgPair(17000, 30000)
-    ->ArgPair(19000, 30000)
-    ->ArgPair(21000, 30000)
-    ->ArgPair(23000, 30000)
-    ->ArgPair(25000, 30000)
-    ->ArgPair(27000, 30000)
-    ->ArgPair(29000, 30000)
-    // Starting from this line, we're trying to see if num_of_documents affects
-    // performance
-    ->ArgPair(10000, 10000)
-    ->ArgPair(10000, 12000)
-    ->ArgPair(10000, 14000)
-    ->ArgPair(10000, 16000)
-    ->ArgPair(10000, 18000)
-    ->ArgPair(10000, 20000);
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
index 5f33e66..a4734b4 100644
--- a/icing/scoring/scorer.cc
+++ b/icing/scoring/scorer.cc
@@ -22,7 +22,6 @@
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/scoring/bm25f-calculator.h"
-#include "icing/scoring/section-weights.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
 #include "icing/util/status-macros.h"
@@ -157,12 +156,11 @@ class NoScorer : public Scorer {
 };
 
 libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Scorer::Create(
-    const ScoringSpecProto& scoring_spec, double default_score,
-    const DocumentStore* document_store, const SchemaStore* schema_store) {
+    ScoringSpecProto::RankingStrategy::Code rank_by, double default_score,
+    const DocumentStore* document_store) {
   ICING_RETURN_ERROR_IF_NULL(document_store);
-  ICING_RETURN_ERROR_IF_NULL(schema_store);
 
-  switch (scoring_spec.rank_by()) {
+  switch (rank_by) {
     case ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE:
       return std::make_unique<DocumentScoreScorer>(document_store,
                                                    default_score);
@@ -170,12 +168,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Scorer::Create(
       return std::make_unique<DocumentCreationTimestampScorer>(document_store,
                                                                default_score);
     case ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE: {
-      ICING_ASSIGN_OR_RETURN(
-          std::unique_ptr<SectionWeights> section_weights,
-          SectionWeights::Create(schema_store, scoring_spec));
-
-      auto bm25f_calculator = std::make_unique<Bm25fCalculator>(
-          document_store, std::move(section_weights));
+      auto bm25f_calculator = std::make_unique<Bm25fCalculator>(document_store);
       return std::make_unique<RelevanceScoreScorer>(std::move(bm25f_calculator),
                                                     default_score);
     }
@@ -190,8 +183,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Scorer::Create(
     case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
       [[fallthrough]];
     case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
-      return std::make_unique<UsageScorer>(
-          document_store, scoring_spec.rank_by(), default_score);
+      return std::make_unique<UsageScorer>(document_store, rank_by,
+                                           default_score);
     case ScoringSpecProto::RankingStrategy::NONE:
       return std::make_unique<NoScorer>(default_score);
   }
diff --git a/icing/scoring/scorer.h b/icing/scoring/scorer.h
index abdd5ca..a22db0f 100644
--- a/icing/scoring/scorer.h
+++ b/icing/scoring/scorer.h
@@ -43,8 +43,8 @@ class Scorer {
   //   FAILED_PRECONDITION on any null pointer input
   //   INVALID_ARGUMENT if fails to create an instance
   static libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
-      const ScoringSpecProto& scoring_spec, double default_score,
-      const DocumentStore* document_store, const SchemaStore* schema_store);
+      ScoringSpecProto::RankingStrategy::Code rank_by, double default_score,
+      const DocumentStore* document_store);
 
   // Returns a non-negative score of a document. The score can be a
   // document-associated score which comes from the DocumentProto directly, an
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index fef612d..8b89514 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -27,7 +27,6 @@
 #include "icing/proto/scoring.pb.h"
 #include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
-#include "icing/scoring/section-weights.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
@@ -40,11 +39,11 @@ namespace lib {
 namespace {
 using ::testing::Eq;
 
-constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
-    PropertyConfigProto::DataType::STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
-    PropertyConfigProto::Cardinality::REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
 
 class ScorerTest : public testing::Test {
  protected:
@@ -92,8 +91,6 @@ class ScorerTest : public testing::Test {
 
   DocumentStore* document_store() { return document_store_.get(); }
 
-  SchemaStore* schema_store() { return schema_store_.get(); }
-
   const FakeClock& fake_clock1() { return fake_clock1_; }
 
   const FakeClock& fake_clock2() { return fake_clock2_; }
@@ -124,37 +121,17 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri,
   return usage_report;
 }
 
-ScoringSpecProto CreateScoringSpecForRankingStrategy(
-    ScoringSpecProto::RankingStrategy::Code ranking_strategy) {
-  ScoringSpecProto scoring_spec;
-  scoring_spec.set_rank_by(ranking_strategy);
-  return scoring_spec;
-}
-
-TEST_F(ScorerTest, CreationWithNullDocumentStoreShouldFail) {
-  EXPECT_THAT(
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE),
-                     /*default_score=*/0, /*document_store=*/nullptr,
-                     schema_store()),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
-
-TEST_F(ScorerTest, CreationWithNullSchemaStoreShouldFail) {
-  EXPECT_THAT(
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE),
-                     /*default_score=*/0, document_store(),
-                     /*schema_store=*/nullptr),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+TEST_F(ScorerTest, CreationWithNullPointerShouldFail) {
+  EXPECT_THAT(Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                             /*default_score=*/0, /*document_store=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentDoesntExist) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE),
-                     /*default_score=*/10, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/10, document_store()));
 
   // Non existent document id
   DocHitInfo docHitInfo = DocHitInfo(/*document_id_in=*/1);
@@ -176,9 +153,8 @@ TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentIsDeleted) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE),
-                     /*default_score=*/10, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/10, document_store()));
 
   DocHitInfo docHitInfo = DocHitInfo(document_id);
 
@@ -209,9 +185,8 @@ TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentIsExpired) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE),
-                     /*default_score=*/10, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/10, document_store()));
 
   DocHitInfo docHitInfo = DocHitInfo(document_id);
 
@@ -238,9 +213,8 @@ TEST_F(ScorerTest, ShouldGetDefaultDocumentScore) {
                              document_store()->Put(test_document));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE),
-                     /*default_score=*/10, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/10, document_store()));
 
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0));
@@ -261,9 +235,8 @@ TEST_F(ScorerTest, ShouldGetCorrectDocumentScore) {
                              document_store()->Put(test_document));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/0, document_store()));
 
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(5));
@@ -286,9 +259,8 @@ TEST_F(ScorerTest, QueryIteratorNullRelevanceScoreShouldReturnDefaultScore) {
                              document_store()->Put(test_document));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE),
-                     /*default_score=*/10, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+                     /*default_score=*/10, document_store()));
 
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
@@ -318,9 +290,8 @@ TEST_F(ScorerTest, ShouldGetCorrectCreationTimestampScore) {
                              document_store()->Put(test_document2));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP,
+                     /*default_score=*/0, document_store()));
 
   DocHitInfo docHitInfo1 = DocHitInfo(document_id1);
   DocHitInfo docHitInfo2 = DocHitInfo(document_id2);
@@ -345,19 +316,16 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageCountScoreForType1) {
   // Create 3 scorers for 3 different usage types.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer1,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT,
+                     /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer2,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT,
+                     /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer3,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
+                     /*default_score=*/0, document_store()));
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -389,19 +357,16 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageCountScoreForType2) {
   // Create 3 scorers for 3 different usage types.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer1,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT,
+                     /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer2,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT,
+                     /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer3,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
+                     /*default_score=*/0, document_store()));
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -433,19 +398,16 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageCountScoreForType3) {
   // Create 3 scorers for 3 different usage types.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer1,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT,
+                     /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer2,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT,
+                     /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer3,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
+                     /*default_score=*/0, document_store()));
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -477,22 +439,19 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
   // Create 3 scorers for 3 different usage types.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer1,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE1_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer2,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE2_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer3,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE3_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -540,22 +499,19 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
   // Create 3 scorers for 3 different usage types.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer1,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE1_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer2,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE2_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer3,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE3_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -603,22 +559,19 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
   // Create 3 scorers for 3 different usage types.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer1,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE1_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer2,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE2_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer3,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE3_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   DocHitInfo docHitInfo = DocHitInfo(document_id);
   EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
   EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
@@ -654,9 +607,8 @@ TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
 TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::NONE),
-                     /*default_score=*/3, document_store(), schema_store()));
+      Scorer::Create(ScoringSpecProto::RankingStrategy::NONE,
+                     /*default_score=*/3, document_store()));
 
   DocHitInfo docHitInfo1 = DocHitInfo(/*document_id_in=*/0);
   DocHitInfo docHitInfo2 = DocHitInfo(/*document_id_in=*/1);
@@ -666,10 +618,8 @@ TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
   EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(3));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      scorer,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::NONE),
-                     /*default_score=*/111, document_store(), schema_store()));
+      scorer, Scorer::Create(ScoringSpecProto::RankingStrategy::NONE,
+                             /*default_score=*/111, document_store()));
 
   docHitInfo1 = DocHitInfo(/*document_id_in=*/4);
   docHitInfo2 = DocHitInfo(/*document_id_in=*/5);
@@ -693,10 +643,9 @@ TEST_F(ScorerTest, ShouldScaleUsageTimestampScoreForMaxTimestamp) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer1,
-      Scorer::Create(CreateScoringSpecForRankingStrategy(
-                         ScoringSpecProto::RankingStrategy::
-                             USAGE_TYPE1_LAST_USED_TIMESTAMP),
-                     /*default_score=*/0, document_store(), schema_store()));
+      Scorer::Create(
+          ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+          /*default_score=*/0, document_store()));
   DocHitInfo docHitInfo = DocHitInfo(document_id);
 
   // Create usage report for the maximum allowable timestamp.
diff --git a/icing/scoring/scoring-processor.cc b/icing/scoring/scoring-processor.cc
index e36f3bb..24480ef 100644
--- a/icing/scoring/scoring-processor.cc
+++ b/icing/scoring/scoring-processor.cc
@@ -39,20 +39,19 @@ constexpr double kDefaultScoreInAscendingOrder =
 
 libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
 ScoringProcessor::Create(const ScoringSpecProto& scoring_spec,
-                         const DocumentStore* document_store,
-                         const SchemaStore* schema_store) {
+                         const DocumentStore* document_store) {
   ICING_RETURN_ERROR_IF_NULL(document_store);
-  ICING_RETURN_ERROR_IF_NULL(schema_store);
 
   bool is_descending_order =
       scoring_spec.order_by() == ScoringSpecProto::Order::DESC;
 
   ICING_ASSIGN_OR_RETURN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(scoring_spec,
+      Scorer::Create(scoring_spec.rank_by(),
                      is_descending_order ? kDefaultScoreInDescendingOrder
                                          : kDefaultScoreInAscendingOrder,
-                     document_store, schema_store));
+                     document_store));
+
   // Using `new` to access a non-public constructor.
   return std::unique_ptr<ScoringProcessor>(
       new ScoringProcessor(std::move(scorer)));
diff --git a/icing/scoring/scoring-processor.h b/icing/scoring/scoring-processor.h
index e7d09b1..2289605 100644
--- a/icing/scoring/scoring-processor.h
+++ b/icing/scoring/scoring-processor.h
@@ -40,8 +40,8 @@ class ScoringProcessor {
   //   A ScoringProcessor on success
   //   FAILED_PRECONDITION on any null pointer input
   static libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>> Create(
-      const ScoringSpecProto& scoring_spec, const DocumentStore* document_store,
-      const SchemaStore* schema_store);
+      const ScoringSpecProto& scoring_spec,
+      const DocumentStore* document_store);
 
   // Assigns scores to DocHitInfos from the given DocHitInfoIterator and returns
   // a vector of ScoredDocumentHits. The size of results is no more than
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index b42ba31..125e2a7 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -34,16 +34,14 @@ namespace lib {
 
 namespace {
 using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::Gt;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
 
-constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
-    PropertyConfigProto::DataType::STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
 
 class ScoringProcessorTest : public testing::Test {
  protected:
@@ -60,7 +58,7 @@ class ScoringProcessorTest : public testing::Test {
 
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
-        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
 
     ICING_ASSERT_OK_AND_ASSIGN(
         DocumentStore::CreateResult create_result,
@@ -71,24 +69,11 @@ class ScoringProcessorTest : public testing::Test {
     // Creates a simple email schema
     SchemaProto test_email_schema =
         SchemaBuilder()
-            .AddType(SchemaTypeConfigBuilder()
-                         .SetType("email")
-                         .AddProperty(
-                             PropertyConfigBuilder()
-                                 .SetName("subject")
-                                 .SetDataTypeString(
-                                     TermMatchType::PREFIX,
-                                     StringIndexingConfig::TokenizerType::PLAIN)
-                                 .SetDataType(TYPE_STRING)
-                                 .SetCardinality(CARDINALITY_OPTIONAL))
-                         .AddProperty(
-                             PropertyConfigBuilder()
-                                 .SetName("body")
-                                 .SetDataTypeString(
-                                     TermMatchType::PREFIX,
-                                     StringIndexingConfig::TokenizerType::PLAIN)
-                                 .SetDataType(TYPE_STRING)
-                                 .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("subject")
+                    .SetDataType(TYPE_STRING)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
             .Build();
     ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
   }
@@ -101,8 +86,6 @@ class ScoringProcessorTest : public testing::Test {
 
   DocumentStore* document_store() { return document_store_.get(); }
 
-  SchemaStore* schema_store() { return schema_store_.get(); }
-
  private:
   const std::string test_dir_;
   const std::string doc_store_dir_;
@@ -156,46 +139,16 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri,
   return usage_report;
 }
 
-TypePropertyWeights CreateTypePropertyWeights(
-    std::string schema_type, std::vector<PropertyWeight> property_weights) {
-  TypePropertyWeights type_property_weights;
-  type_property_weights.set_schema_type(std::move(schema_type));
-  type_property_weights.mutable_property_weights()->Reserve(
-      property_weights.size());
-
-  for (PropertyWeight& property_weight : property_weights) {
-    *type_property_weights.add_property_weights() = std::move(property_weight);
-  }
-
-  return type_property_weights;
-}
-
-PropertyWeight CreatePropertyWeight(std::string path, double weight) {
-  PropertyWeight property_weight;
-  property_weight.set_path(std::move(path));
-  property_weight.set_weight(weight);
-  return property_weight;
-}
-
-TEST_F(ScoringProcessorTest, CreationWithNullDocumentStoreShouldFail) {
+TEST_F(ScoringProcessorTest, CreationWithNullPointerShouldFail) {
   ScoringSpecProto spec_proto;
-  EXPECT_THAT(ScoringProcessor::Create(spec_proto, /*document_store=*/nullptr,
-                                       schema_store()),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
-
-TEST_F(ScoringProcessorTest, CreationWithNullSchemaStoreShouldFail) {
-  ScoringSpecProto spec_proto;
-  EXPECT_THAT(ScoringProcessor::Create(spec_proto, document_store(),
-                                       /*schema_store=*/nullptr),
+  EXPECT_THAT(ScoringProcessor::Create(spec_proto, /*document_store=*/nullptr),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 TEST_F(ScoringProcessorTest, ShouldCreateInstance) {
   ScoringSpecProto spec_proto;
   spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-  ICING_EXPECT_OK(
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+  ICING_EXPECT_OK(ScoringProcessor::Create(spec_proto, document_store()));
 }
 
 TEST_F(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
@@ -210,7 +163,7 @@ TEST_F(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/5),
@@ -236,7 +189,7 @@ TEST_F(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/-1),
@@ -266,7 +219,7 @@ TEST_F(ScoringProcessorTest, ShouldRespectNumToScore) {
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/2),
@@ -298,7 +251,7 @@ TEST_F(ScoringProcessorTest, ShouldScoreByDocumentScore) {
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/3),
@@ -353,7 +306,7 @@ TEST_F(ScoringProcessorTest,
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
       query_term_iterators;
@@ -363,11 +316,11 @@ TEST_F(ScoringProcessorTest,
   // the document's length determines the final score. Document shorter than the
   // average corpus length are slightly boosted.
   ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
-                                             /*score=*/0.187114);
+                                             /*score=*/0.255482);
   ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
-                                             /*score=*/0.084904);
+                                             /*score=*/0.115927);
   ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
-                                             /*score=*/0.121896);
+                                             /*score=*/0.166435);
   EXPECT_THAT(
       scoring_processor->Score(std::move(doc_hit_info_iterator),
                                /*num_to_score=*/3, &query_term_iterators),
@@ -422,7 +375,7 @@ TEST_F(ScoringProcessorTest,
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
       query_term_iterators;
@@ -431,11 +384,11 @@ TEST_F(ScoringProcessorTest,
   // Since the three documents all contain the query term "foo" exactly once
   // and they have the same length, they will have the same BM25F scoret.
   ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
-                                             /*score=*/0.118455);
+                                             /*score=*/0.16173716);
   ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
-                                             /*score=*/0.118455);
+                                             /*score=*/0.16173716);
   ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
-                                             /*score=*/0.118455);
+                                             /*score=*/0.16173716);
   EXPECT_THAT(
       scoring_processor->Score(std::move(doc_hit_info_iterator),
                                /*num_to_score=*/3, &query_term_iterators),
@@ -495,7 +448,7 @@ TEST_F(ScoringProcessorTest,
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
       query_term_iterators;
@@ -504,11 +457,11 @@ TEST_F(ScoringProcessorTest,
   // Since the three documents all have the same length, the score is decided by
   // the frequency of the query term "foo".
   ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
-                                             /*score=*/0.226674);
+                                             /*score=*/0.309497);
   ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask2,
-                                             /*score=*/0.118455);
+                                             /*score=*/0.16173716);
   ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask3,
-                                             /*score=*/0.196720);
+                                             /*score=*/0.268599);
   EXPECT_THAT(
       scoring_processor->Score(std::move(doc_hit_info_iterator),
                                /*num_to_score=*/3, &query_term_iterators),
@@ -517,351 +470,6 @@ TEST_F(ScoringProcessorTest,
                   EqualsScoredDocumentHit(expected_scored_doc_hit3)));
 }
 
-TEST_F(ScoringProcessorTest,
-       ShouldScoreByRelevanceScore_HitTermWithZeroFrequency) {
-  DocumentProto document1 =
-      CreateDocument("icing", "email/1", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id1,
-      document_store()->Put(document1, /*num_tokens=*/10));
-
-  // Document 1 contains the term "foo" 0 times in the "subject" property
-  DocHitInfo doc_hit_info1(document_id1);
-  doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/0);
-
-  // Creates input doc_hit_infos and expected output scored_document_hits
-  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
-
-  // Creates a dummy DocHitInfoIterator with 1 result for the query "foo"
-  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
-
-  // Creates a ScoringProcessor
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
-
-  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
-      query_term_iterators;
-  query_term_iterators["foo"] =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  SectionIdMask section_id_mask1 = 0b00000001;
-
-  // Since the document hit has zero frequency, expect a score of zero.
-  ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
-                                             /*score=*/0.000000);
-  EXPECT_THAT(
-      scoring_processor->Score(std::move(doc_hit_info_iterator),
-                               /*num_to_score=*/1, &query_term_iterators),
-      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1)));
-}
-
-TEST_F(ScoringProcessorTest,
-       ShouldScoreByRelevanceScore_SameHitFrequencyDifferentPropertyWeights) {
-  DocumentProto document1 =
-      CreateDocument("icing", "email/1", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-  DocumentProto document2 =
-      CreateDocument("icing", "email/2", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id1,
-      document_store()->Put(document1, /*num_tokens=*/1));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id2,
-      document_store()->Put(document2, /*num_tokens=*/1));
-
-  // Document 1 contains the term "foo" 1 time in the "body" property
-  SectionId body_section_id = 0;
-  DocHitInfo doc_hit_info1(document_id1);
-  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
-
-  // Document 2 contains the term "foo" 1 time in the "subject" property
-  SectionId subject_section_id = 1;
-  DocHitInfo doc_hit_info2(document_id2);
-  doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
-
-  // Creates input doc_hit_infos and expected output scored_document_hits
-  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2};
-
-  // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
-  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
-
-  PropertyWeight body_property_weight =
-      CreatePropertyWeight(/*path=*/"body", /*weight=*/0.5);
-  PropertyWeight subject_property_weight =
-      CreatePropertyWeight(/*path=*/"subject", /*weight=*/2.0);
-  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
-      /*schema_type=*/"email", {body_property_weight, subject_property_weight});
-
-  // Creates a ScoringProcessor
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
-
-  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
-      query_term_iterators;
-  query_term_iterators["foo"] =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  SectionIdMask body_section_id_mask = 1U << body_section_id;
-  SectionIdMask subject_section_id_mask = 1U << subject_section_id;
-
-  // We expect document 2 to have a higher score than document 1 as it matches
-  // "foo" in the "subject" property, which is weighed higher than the "body"
-  // property. Final scores are computed with smoothing applied.
-  ScoredDocumentHit expected_scored_doc_hit1(document_id1, body_section_id_mask,
-                                             /*score=*/0.053624);
-  ScoredDocumentHit expected_scored_doc_hit2(document_id2,
-                                             subject_section_id_mask,
-                                             /*score=*/0.153094);
-  EXPECT_THAT(
-      scoring_processor->Score(std::move(doc_hit_info_iterator),
-                               /*num_to_score=*/2, &query_term_iterators),
-      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
-                  EqualsScoredDocumentHit(expected_scored_doc_hit2)));
-}
-
-TEST_F(ScoringProcessorTest,
-       ShouldScoreByRelevanceScore_WithImplicitPropertyWeight) {
-  DocumentProto document1 =
-      CreateDocument("icing", "email/1", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-  DocumentProto document2 =
-      CreateDocument("icing", "email/2", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id1,
-      document_store()->Put(document1, /*num_tokens=*/1));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id2,
-      document_store()->Put(document2, /*num_tokens=*/1));
-
-  // Document 1 contains the term "foo" 1 time in the "body" property
-  SectionId body_section_id = 0;
-  DocHitInfo doc_hit_info1(document_id1);
-  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
-
-  // Document 2 contains the term "foo" 1 time in the "subject" property
-  SectionId subject_section_id = 1;
-  DocHitInfo doc_hit_info2(document_id2);
-  doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
-
-  // Creates input doc_hit_infos and expected output scored_document_hits
-  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2};
-
-  // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
-  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
-
-  PropertyWeight body_property_weight =
-      CreatePropertyWeight(/*path=*/"body", /*weight=*/0.5);
-  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
-      /*schema_type=*/"email", {body_property_weight});
-
-  // Creates a ScoringProcessor
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
-
-  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
-      query_term_iterators;
-  query_term_iterators["foo"] =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  SectionIdMask body_section_id_mask = 1U << body_section_id;
-  SectionIdMask subject_section_id_mask = 1U << subject_section_id;
-
-  // We expect document 2 to have a higher score than document 1 as it matches
-  // "foo" in the "subject" property, which is weighed higher than the "body"
-  // property. This is because the "subject" property is implictly given a
-  // a weight of 1.0, the default weight value. Final scores are computed with
-  // smoothing applied.
-  ScoredDocumentHit expected_scored_doc_hit1(document_id1, body_section_id_mask,
-                                             /*score=*/0.094601);
-  ScoredDocumentHit expected_scored_doc_hit2(document_id2,
-                                             subject_section_id_mask,
-                                             /*score=*/0.153094);
-  EXPECT_THAT(
-      scoring_processor->Score(std::move(doc_hit_info_iterator),
-                               /*num_to_score=*/2, &query_term_iterators),
-      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
-                  EqualsScoredDocumentHit(expected_scored_doc_hit2)));
-}
-
-TEST_F(ScoringProcessorTest,
-       ShouldScoreByRelevanceScore_WithDefaultPropertyWeight) {
-  DocumentProto document1 =
-      CreateDocument("icing", "email/1", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-  DocumentProto document2 =
-      CreateDocument("icing", "email/2", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id1,
-      document_store()->Put(document1, /*num_tokens=*/1));
-
-  // Document 1 contains the term "foo" 1 time in the "body" property
-  SectionId body_section_id = 0;
-  DocHitInfo doc_hit_info1(document_id1);
-  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
-
-  // Creates input doc_hit_infos and expected output scored_document_hits
-  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
-
-  // Creates a dummy DocHitInfoIterator with 1 result for the query "foo"
-  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
-
-  *spec_proto.add_type_property_weights() =
-      CreateTypePropertyWeights(/*schema_type=*/"email", {});
-
-  // Creates a ScoringProcessor with no explicit weights set.
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
-
-  ScoringSpecProto spec_proto_with_weights;
-  spec_proto_with_weights.set_rank_by(
-      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
-
-  PropertyWeight body_property_weight = CreatePropertyWeight(/*path=*/"body",
-                                                             /*weight=*/1.0);
-  *spec_proto_with_weights.add_type_property_weights() =
-      CreateTypePropertyWeights(/*schema_type=*/"email",
-                                {body_property_weight});
-
-  // Creates a ScoringProcessor with default weight set for "body" property.
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ScoringProcessor> scoring_processor_with_weights,
-      ScoringProcessor::Create(spec_proto_with_weights, document_store(),
-                               schema_store()));
-
-  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
-      query_term_iterators;
-  query_term_iterators["foo"] =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  // Create a doc hit iterator
-  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
-      query_term_iterators_scoring_with_weights;
-  query_term_iterators_scoring_with_weights["foo"] =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  SectionIdMask body_section_id_mask = 1U << body_section_id;
-
-  // We expect document 1 to have the same score whether a weight is explicitly
-  // set to 1.0 or implictly scored with the default weight. Final scores are
-  // computed with smoothing applied.
-  ScoredDocumentHit expected_scored_doc_hit(document_id1, body_section_id_mask,
-                                            /*score=*/0.208191);
-  EXPECT_THAT(
-      scoring_processor->Score(std::move(doc_hit_info_iterator),
-                               /*num_to_score=*/1, &query_term_iterators),
-      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
-
-  // Restore ownership of doc hit iterator and query term iterator to test.
-  doc_hit_info_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-  query_term_iterators["foo"] =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  EXPECT_THAT(scoring_processor_with_weights->Score(
-                  std::move(doc_hit_info_iterator),
-                  /*num_to_score=*/1, &query_term_iterators),
-              ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
-}
-
-TEST_F(ScoringProcessorTest,
-       ShouldScoreByRelevanceScore_WithZeroPropertyWeight) {
-  DocumentProto document1 =
-      CreateDocument("icing", "email/1", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-  DocumentProto document2 =
-      CreateDocument("icing", "email/2", kDefaultScore,
-                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id1,
-      document_store()->Put(document1, /*num_tokens=*/1));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentId document_id2,
-      document_store()->Put(document2, /*num_tokens=*/1));
-
-  // Document 1 contains the term "foo" 1 time in the "body" property
-  SectionId body_section_id = 0;
-  DocHitInfo doc_hit_info1(document_id1);
-  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
-
-  // Document 2 contains the term "foo" 1 time in the "subject" property
-  SectionId subject_section_id = 1;
-  DocHitInfo doc_hit_info2(document_id2);
-  doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
-
-  // Creates input doc_hit_infos and expected output scored_document_hits
-  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2};
-
-  // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
-  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
-
-  // Sets property weight for "body" to 0.0.
-  PropertyWeight body_property_weight =
-      CreatePropertyWeight(/*path=*/"body", /*weight=*/0.0);
-  // Sets property weight for "subject" to 1.0.
-  PropertyWeight subject_property_weight =
-      CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0);
-  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
-      /*schema_type=*/"email", {body_property_weight, subject_property_weight});
-
-  // Creates a ScoringProcessor
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
-
-  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
-      query_term_iterators;
-  query_term_iterators["foo"] =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
-
-  std::vector<ScoredDocumentHit> scored_document_hits =
-      scoring_processor->Score(std::move(doc_hit_info_iterator),
-                               /*num_to_score=*/2, &query_term_iterators);
-
-  // We expect document1 to have a score of 0.0 as the query term "foo" matches
-  // in the "body" property which has a weight of 0.0. This is a result of the
-  // weighted term frequency being scaled down to 0.0 for the hit. We expect
-  // document2 to have a positive score as the query term "foo" matches in the
-  // "subject" property which has a weight of 1.0.
-  EXPECT_THAT(scored_document_hits, SizeIs(2));
-  EXPECT_THAT(scored_document_hits.at(0).document_id(), Eq(document_id1));
-  EXPECT_THAT(scored_document_hits.at(0).score(), Eq(0.0));
-  EXPECT_THAT(scored_document_hits.at(1).document_id(), Eq(document_id2));
-  EXPECT_THAT(scored_document_hits.at(1).score(), Gt(0.0));
-}
-
 TEST_F(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
   DocumentProto document1 =
       CreateDocument("icing", "email/1", kDefaultScore,
@@ -901,7 +509,7 @@ TEST_F(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
   // Creates a ScoringProcessor which ranks in descending order
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/3),
@@ -961,7 +569,7 @@ TEST_F(ScoringProcessorTest, ShouldScoreByUsageCount) {
   // Creates a ScoringProcessor which ranks in descending order
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/3),
@@ -1021,7 +629,7 @@ TEST_F(ScoringProcessorTest, ShouldScoreByUsageTimestamp) {
   // Creates a ScoringProcessor which ranks in descending order
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/3),
@@ -1057,7 +665,7 @@ TEST_F(ScoringProcessorTest, ShouldHandleNoScores) {
   // Creates a ScoringProcessor which ranks in descending order
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/4),
               ElementsAre(EqualsScoredDocumentHit(scored_document_hit_default),
@@ -1106,7 +714,7 @@ TEST_F(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
   // Creates a ScoringProcessor which ranks in descending order
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+      ScoringProcessor::Create(spec_proto, document_store()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/3),
diff --git a/icing/scoring/section-weights.cc b/icing/scoring/section-weights.cc
deleted file mode 100644
index ed7cd5e..0000000
--- a/icing/scoring/section-weights.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/scoring/section-weights.h"
-
-#include <cfloat>
-#include <unordered_map>
-#include <utility>
-
-#include "icing/proto/scoring.pb.h"
-#include "icing/schema/section.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-// Normalizes all weights in the map to be in range [0.0, 1.0], where the max
-// weight is normalized to 1.0. In the case that all weights are equal to 0.0,
-// the normalized weight for each will be 0.0.
-inline void NormalizeSectionWeights(
-    double max_weight, std::unordered_map<SectionId, double>& section_weights) {
-  if (max_weight == 0.0) {
-    return;
-  }
-  for (auto& raw_weight : section_weights) {
-    raw_weight.second = raw_weight.second / max_weight;
-  }
-}
-}  // namespace
-
-libtextclassifier3::StatusOr<std::unique_ptr<SectionWeights>>
-SectionWeights::Create(const SchemaStore* schema_store,
-                       const ScoringSpecProto& scoring_spec) {
-  ICING_RETURN_ERROR_IF_NULL(schema_store);
-
-  std::unordered_map<SchemaTypeId, NormalizedSectionWeights>
-      schema_property_weight_map;
-  for (const TypePropertyWeights& type_property_weights :
-       scoring_spec.type_property_weights()) {
-    std::string_view schema_type = type_property_weights.schema_type();
-    auto schema_type_id_or = schema_store->GetSchemaTypeId(schema_type);
-    if (!schema_type_id_or.ok()) {
-      ICING_LOG(WARNING) << "No schema type id found for schema type: "
-                         << schema_type;
-      continue;
-    }
-    SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
-    auto section_metadata_list_or =
-        schema_store->GetSectionMetadata(schema_type.data());
-    if (!section_metadata_list_or.ok()) {
-      ICING_LOG(WARNING) << "No metadata found for schema type: "
-                         << schema_type;
-      continue;
-    }
-
-    const std::vector<SectionMetadata>* metadata_list =
-        section_metadata_list_or.ValueOrDie();
-
-    std::unordered_map<std::string, double> property_paths_weights;
-    for (const PropertyWeight& property_weight :
-         type_property_weights.property_weights()) {
-      double property_path_weight = property_weight.weight();
-
-      // Return error on negative weights.
-      if (property_path_weight < 0.0) {
-        return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-            "Property weight for property path \"%s\" is negative. Negative "
-            "weights are invalid.",
-            property_weight.path().c_str()));
-      }
-      property_paths_weights.insert(
-          {property_weight.path(), property_path_weight});
-    }
-    NormalizedSectionWeights normalized_section_weights =
-        ExtractNormalizedSectionWeights(property_paths_weights, *metadata_list);
-
-    schema_property_weight_map.insert(
-        {schema_type_id,
-         {/*section_weights*/ std::move(
-              normalized_section_weights.section_weights),
-          /*default_weight*/ normalized_section_weights.default_weight}});
-  }
-  // Using `new` to access a non-public constructor.
-  return std::unique_ptr<SectionWeights>(
-      new SectionWeights(std::move(schema_property_weight_map)));
-}
-
-double SectionWeights::GetNormalizedSectionWeight(SchemaTypeId schema_type_id,
-                                                  SectionId section_id) const {
-  auto schema_type_map = schema_section_weight_map_.find(schema_type_id);
-  if (schema_type_map == schema_section_weight_map_.end()) {
-    // Return default weight if the schema type has no weights specified.
-    return kDefaultSectionWeight;
-  }
-
-  auto section_weight =
-      schema_type_map->second.section_weights.find(section_id);
-  if (section_weight == schema_type_map->second.section_weights.end()) {
-    // If there is no entry for SectionId, the weight is implicitly the
-    // normalized default weight.
-    return schema_type_map->second.default_weight;
-  }
-  return section_weight->second;
-}
-
-inline SectionWeights::NormalizedSectionWeights
-SectionWeights::ExtractNormalizedSectionWeights(
-    const std::unordered_map<std::string, double>& raw_weights,
-    const std::vector<SectionMetadata>& metadata_list) {
-  double max_weight = -std::numeric_limits<double>::infinity();
-  std::unordered_map<SectionId, double> section_weights;
-  for (const SectionMetadata& section_metadata : metadata_list) {
-    std::string_view metadata_path = section_metadata.path;
-    double section_weight = kDefaultSectionWeight;
-    auto iter = raw_weights.find(metadata_path.data());
-    if (iter != raw_weights.end()) {
-      section_weight = iter->second;
-      section_weights.insert({section_metadata.id, section_weight});
-    }
-    // Replace max if we see new max weight.
-    max_weight = std::max(max_weight, section_weight);
-  }
-
-  NormalizeSectionWeights(max_weight, section_weights);
-  // Set normalized default weight to 1.0 in case there is no section
-  // metadata and max_weight is -INF (we should not see this case).
-  double normalized_default_weight =
-      max_weight == -std::numeric_limits<double>::infinity()
-          ? kDefaultSectionWeight
-          : kDefaultSectionWeight / max_weight;
-  SectionWeights::NormalizedSectionWeights normalized_section_weights =
-      SectionWeights::NormalizedSectionWeights();
-  normalized_section_weights.section_weights = std::move(section_weights);
-  normalized_section_weights.default_weight = normalized_default_weight;
-  return normalized_section_weights;
-}
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/scoring/section-weights.h b/icing/scoring/section-weights.h
deleted file mode 100644
index 23a9188..0000000
--- a/icing/scoring/section-weights.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_SCORING_SECTION_WEIGHTS_H_
-#define ICING_SCORING_SECTION_WEIGHTS_H_
-
-#include <unordered_map>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/schema/schema-store.h"
-#include "icing/store/document-store.h"
-
-namespace icing {
-namespace lib {
-
-inline constexpr double kDefaultSectionWeight = 1.0;
-
-// Provides functions for setting and retrieving section weights for schema
-// type properties. Section weights are used to promote and demote term matches
-// in sections when scoring results. Section weights are provided by property
-// path, and can range from (0, DBL_MAX]. The SectionId is matched to the
-// property path by going over the schema type's section metadata. Weights that
-// correspond to a valid property path are then normalized against the maxmium
-// section weight, and put into map for quick access for scorers. By default,
-// a section is given a raw, pre-normalized weight of 1.0.
-class SectionWeights {
- public:
-  // SectionWeights instances should not be copied.
-  SectionWeights(const SectionWeights&) = delete;
-  SectionWeights& operator=(const SectionWeights&) = delete;
-
-  // Factory function to create a SectionWeights instance. Raw weights are
-  // provided through the ScoringSpecProto. Provided property paths for weights
-  // are validated against the schema type's section metadata. If the property
-  // path doesn't exist, the property weight is ignored. If a weight is 0 or
-  // negative, an invalid argument error is returned. Raw weights are then
-  // normalized against the maximum weight for that schema type.
-  //
-  // Returns:
-  //   A SectionWeights instance on success
-  //   FAILED_PRECONDITION on any null pointer input
-  //   INVALID_ARGUMENT if a provided weight for a property path is less than or
-  // equal to 0.
-  static libtextclassifier3::StatusOr<std::unique_ptr<SectionWeights>> Create(
-      const SchemaStore* schema_store, const ScoringSpecProto& scoring_spec);
-
-  // Returns the normalized section weight by SchemaTypeId and SectionId. If
-  // the SchemaTypeId, or the SectionId for a SchemaTypeId, is not found in the
-  // normalized weights map, the default weight is returned instead.
-  double GetNormalizedSectionWeight(SchemaTypeId schema_type_id,
-                                    SectionId section_id) const;
-
- private:
-  // Holds the normalized section weights for a schema type, as well as the
-  // normalized default weight for sections that have no weight set.
-  struct NormalizedSectionWeights {
-    std::unordered_map<SectionId, double> section_weights;
-    double default_weight;
-  };
-
-  explicit SectionWeights(
-      const std::unordered_map<SchemaTypeId, NormalizedSectionWeights>
-          schema_section_weight_map)
-      : schema_section_weight_map_(std::move(schema_section_weight_map)) {}
-
-  // Creates a map of section ids to normalized weights from the raw property
-  // path weight map and section metadata and calculates the normalized default
-  // section weight.
-  static inline SectionWeights::NormalizedSectionWeights
-  ExtractNormalizedSectionWeights(
-      const std::unordered_map<std::string, double>& raw_weights,
-      const std::vector<SectionMetadata>& metadata_list);
-
-  // A map of (SchemaTypeId -> SectionId -> Normalized Weight), allows for fast
-  // look up of normalized weights. This is precomputed when creating a
-  // SectionWeights instance.
-  std::unordered_map<SchemaTypeId, NormalizedSectionWeights>
-      schema_section_weight_map_;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_SCORING_SECTION_WEIGHTS_H_
diff --git a/icing/scoring/section-weights_test.cc b/icing/scoring/section-weights_test.cc
deleted file mode 100644
index 330faee..0000000
--- a/icing/scoring/section-weights_test.cc
+++ /dev/null
@@ -1,443 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/scoring/section-weights.h"
-
-#include <cfloat>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/proto/scoring.pb.h"
-#include "icing/schema-builder.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/fake-clock.h"
-#include "icing/testing/tmp-directory.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-using ::testing::Eq;
-
-class SectionWeightsTest : public testing::Test {
- protected:
-  SectionWeightsTest()
-      : test_dir_(GetTestTempDir() + "/icing"),
-        schema_store_dir_(test_dir_ + "/schema_store") {}
-
-  void SetUp() override {
-    // Creates file directories
-    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
-    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
-
-    ICING_ASSERT_OK_AND_ASSIGN(
-        schema_store_,
-        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
-
-    SchemaTypeConfigProto sender_schema =
-        SchemaTypeConfigBuilder()
-            .SetType("sender")
-            .AddProperty(
-                PropertyConfigBuilder()
-                    .SetName("name")
-                    .SetDataTypeString(
-                        TermMatchType::PREFIX,
-                        StringIndexingConfig::TokenizerType::PLAIN)
-                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
-            .Build();
-    SchemaTypeConfigProto email_schema =
-        SchemaTypeConfigBuilder()
-            .SetType("email")
-            .AddProperty(
-                PropertyConfigBuilder()
-                    .SetName("subject")
-                    .SetDataTypeString(
-                        TermMatchType::PREFIX,
-                        StringIndexingConfig::TokenizerType::PLAIN)
-                    .SetDataType(PropertyConfigProto::DataType::STRING)
-                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
-            .AddProperty(
-                PropertyConfigBuilder()
-                    .SetName("body")
-                    .SetDataTypeString(
-                        TermMatchType::PREFIX,
-                        StringIndexingConfig::TokenizerType::PLAIN)
-                    .SetDataType(PropertyConfigProto::DataType::STRING)
-                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
-            .AddProperty(
-                PropertyConfigBuilder()
-                    .SetName("sender")
-                    .SetDataTypeDocument("sender",
-                                         /*index_nested_properties=*/true)
-                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
-            .Build();
-    SchemaProto schema =
-        SchemaBuilder().AddType(sender_schema).AddType(email_schema).Build();
-
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
-  }
-
-  void TearDown() override {
-    schema_store_.reset();
-    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
-  }
-
-  SchemaStore *schema_store() { return schema_store_.get(); }
-
- private:
-  const std::string test_dir_;
-  const std::string schema_store_dir_;
-  Filesystem filesystem_;
-  FakeClock fake_clock_;
-  std::unique_ptr<SchemaStore> schema_store_;
-};
-
-TEST_F(SectionWeightsTest, ShouldNormalizeSinglePropertyWeight) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *type_property_weights =
-      spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("sender");
-
-  PropertyWeight *property_weight =
-      type_property_weights->add_property_weights();
-  property_weight->set_weight(5.0);
-  property_weight->set_path("name");
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId sender_schema_type_id,
-                             schema_store()->GetSchemaTypeId("sender"));
-
-  // section_id 0 corresponds to property "name".
-  // We expect 1.0 as there is only one property in the "sender" schema type
-  // so it should take the max normalized weight of 1.0.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(sender_schema_type_id,
-                                                          /*section_id=*/0),
-              Eq(1.0));
-}
-
-TEST_F(SectionWeightsTest, ShouldAcceptMaxWeightValue) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *type_property_weights =
-      spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("sender");
-
-  PropertyWeight *property_weight =
-      type_property_weights->add_property_weights();
-  property_weight->set_weight(DBL_MAX);
-  property_weight->set_path("name");
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId sender_schema_type_id,
-                             schema_store()->GetSchemaTypeId("sender"));
-
-  // section_id 0 corresponds to property "name".
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(sender_schema_type_id,
-                                                          /*section_id=*/0),
-              Eq(1.0));
-}
-
-TEST_F(SectionWeightsTest, ShouldFailWithNegativeWeights) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *type_property_weights =
-      spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("email");
-
-  PropertyWeight *body_propery_weight =
-      type_property_weights->add_property_weights();
-  body_propery_weight->set_weight(-100.0);
-  body_propery_weight->set_path("body");
-
-  EXPECT_THAT(SectionWeights::Create(schema_store(), spec_proto).status(),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST_F(SectionWeightsTest, ShouldAcceptZeroWeight) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *type_property_weights =
-      spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("email");
-
-  PropertyWeight *body_property_weight =
-      type_property_weights->add_property_weights();
-  body_property_weight->set_weight(2.0);
-  body_property_weight->set_path("body");
-
-  PropertyWeight *subject_property_weight =
-      type_property_weights->add_property_weights();
-  subject_property_weight->set_weight(0.0);
-  subject_property_weight->set_path("subject");
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
-                             schema_store()->GetSchemaTypeId("email"));
-
-  // Normalized weight for "body" property.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/0),
-              Eq(1.0));
-  // Normalized weight for "subject" property.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/2),
-              Eq(0.0));
-}
-
-TEST_F(SectionWeightsTest, ShouldNormalizeToZeroWhenAllWeightsZero) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *type_property_weights =
-      spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("email");
-
-  PropertyWeight *body_property_weight =
-      type_property_weights->add_property_weights();
-  body_property_weight->set_weight(0.0);
-  body_property_weight->set_path("body");
-
-  PropertyWeight *sender_property_weight =
-      type_property_weights->add_property_weights();
-  sender_property_weight->set_weight(0.0);
-  sender_property_weight->set_path("sender.name");
-
-  PropertyWeight *subject_property_weight =
-      type_property_weights->add_property_weights();
-  subject_property_weight->set_weight(0.0);
-  subject_property_weight->set_path("subject");
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
-                             schema_store()->GetSchemaTypeId("email"));
-
-  // Normalized weight for "body" property.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/0),
-              Eq(0.0));
-  // Normalized weight for "sender.name" property (the nested property).
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/1),
-              Eq(0.0));
-  // Normalized weight for "subject" property.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/2),
-              Eq(0.0));
-}
-
-TEST_F(SectionWeightsTest, ShouldReturnDefaultIfTypePropertyWeightsNotSet) {
-  ScoringSpecProto spec_proto;
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
-                             schema_store()->GetSchemaTypeId("email"));
-
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/0),
-              Eq(kDefaultSectionWeight));
-}
-
-TEST_F(SectionWeightsTest, ShouldSetNestedPropertyWeights) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *type_property_weights =
-      spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("email");
-
-  PropertyWeight *body_property_weight =
-      type_property_weights->add_property_weights();
-  body_property_weight->set_weight(1.0);
-  body_property_weight->set_path("body");
-
-  PropertyWeight *subject_property_weight =
-      type_property_weights->add_property_weights();
-  subject_property_weight->set_weight(100.0);
-  subject_property_weight->set_path("subject");
-
-  PropertyWeight *nested_property_weight =
-      type_property_weights->add_property_weights();
-  nested_property_weight->set_weight(50.0);
-  nested_property_weight->set_path("sender.name");
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
-                             schema_store()->GetSchemaTypeId("email"));
-
-  // Normalized weight for "body" property.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/0),
-              Eq(0.01));
-  // Normalized weight for "sender.name" property (the nested property).
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/1),
-              Eq(0.5));
-  // Normalized weight for "subject" property.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/2),
-              Eq(1.0));
-}
-
-TEST_F(SectionWeightsTest, ShouldNormalizeIfAllWeightsBelowOne) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *type_property_weights =
-      spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("email");
-
-  PropertyWeight *body_property_weight =
-      type_property_weights->add_property_weights();
-  body_property_weight->set_weight(0.1);
-  body_property_weight->set_path("body");
-
-  PropertyWeight *sender_name_weight =
-      type_property_weights->add_property_weights();
-  sender_name_weight->set_weight(0.2);
-  sender_name_weight->set_path("sender.name");
-
-  PropertyWeight *subject_property_weight =
-      type_property_weights->add_property_weights();
-  subject_property_weight->set_weight(0.4);
-  subject_property_weight->set_path("subject");
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
-                             schema_store()->GetSchemaTypeId("email"));
-
-  // Normalized weight for "body" property.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/0),
-              Eq(1.0 / 4.0));
-  // Normalized weight for "sender.name" property (the nested property).
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/1),
-              Eq(2.0 / 4.0));
-  // Normalized weight for "subject" property.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/2),
-              Eq(1.0));
-}
-
-TEST_F(SectionWeightsTest, ShouldSetNestedPropertyWeightSeparatelyForTypes) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *email_type_property_weights =
-      spec_proto.add_type_property_weights();
-  email_type_property_weights->set_schema_type("email");
-
-  PropertyWeight *body_property_weight =
-      email_type_property_weights->add_property_weights();
-  body_property_weight->set_weight(1.0);
-  body_property_weight->set_path("body");
-
-  PropertyWeight *subject_property_weight =
-      email_type_property_weights->add_property_weights();
-  subject_property_weight->set_weight(100.0);
-  subject_property_weight->set_path("subject");
-
-  PropertyWeight *sender_name_property_weight =
-      email_type_property_weights->add_property_weights();
-  sender_name_property_weight->set_weight(50.0);
-  sender_name_property_weight->set_path("sender.name");
-
-  TypePropertyWeights *sender_type_property_weights =
-      spec_proto.add_type_property_weights();
-  sender_type_property_weights->set_schema_type("sender");
-
-  PropertyWeight *sender_property_weight =
-      sender_type_property_weights->add_property_weights();
-  sender_property_weight->set_weight(25.0);
-  sender_property_weight->set_path("sender");
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
-                             schema_store()->GetSchemaTypeId("email"));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId sender_schema_type_id,
-                             schema_store()->GetSchemaTypeId("sender"));
-
-  // Normalized weight for "sender.name" property (the nested property)
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/1),
-              Eq(0.5));
-  // Normalized weight for "name" property for "sender" schema type. As it is
-  // the only property of the type, it should take the max normalized weight of
-  // 1.0.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(sender_schema_type_id,
-                                                          /*section_id=*/2),
-              Eq(1.0));
-}
-
-TEST_F(SectionWeightsTest, ShouldSkipNonExistentPathWhenSettingWeights) {
-  ScoringSpecProto spec_proto;
-
-  TypePropertyWeights *type_property_weights =
-      spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("email");
-
-  // If this property weight isn't skipped, then the max property weight would
-  // be set to 100.0 and all weights would be normalized against the max.
-  PropertyWeight *non_valid_property_weight =
-      type_property_weights->add_property_weights();
-  non_valid_property_weight->set_weight(100.0);
-  non_valid_property_weight->set_path("sender.organization");
-
-  PropertyWeight *subject_property_weight =
-      type_property_weights->add_property_weights();
-  subject_property_weight->set_weight(10.0);
-  subject_property_weight->set_path("subject");
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SectionWeights> section_weights,
-      SectionWeights::Create(schema_store(), spec_proto));
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
-                             schema_store()->GetSchemaTypeId("email"));
-
-  // Normalized weight for "body" property. Because the weight is not explicitly
-  // set, it is set to the default of 1.0 before being normalized.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/0),
-              Eq(0.1));
-  // Normalized weight for "sender.name" property (the nested property). Because
-  // the weight is not explicitly set, it is set to the default of 1.0 before
-  // being normalized.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/1),
-              Eq(0.1));
-  // Normalized weight for "subject" property. Because the invalid property path
-  // is skipped when assigning weights, subject takes the max normalized weight
-  // of 1.0 instead.
-  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
-                                                          /*section_id=*/2),
-              Eq(1.0));
-}
-
-}  // namespace
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
index 5e23a8e..a035f93 100644
--- a/icing/store/document-log-creator.cc
+++ b/icing/store/document-log-creator.cc
@@ -69,24 +69,33 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
                            const std::string& base_dir) {
   bool v0_exists =
       filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
+  bool regen_derived_files = false;
+
+#ifdef ENABLE_V1_MIGRATION
   bool v1_exists =
       filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
 
-  bool new_file = false;
-  int preexisting_file_version = kCurrentVersion;
   if (v0_exists && !v1_exists) {
     ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
 
     // Need to regenerate derived files since documents may be written to a
     // different file offset in the log.
-    preexisting_file_version = 0;
+    regen_derived_files = true;
   } else if (!v1_exists) {
     // First time initializing a v1 log. There are no existing derived files at
     // this point, so we should generate some. "regenerate" here also means
     // "generate for the first time", i.e. we shouldn't expect there to be any
     // existing derived files.
-    new_file = true;
+    regen_derived_files = true;
+  }
+#else  // !ENABLE_V1_MIGRATION
+  if (v0_exists) {
+    // If migration from v0 to v1 is not enabled, then simply delete the v0 file
+    // and treat this as if it's our first time initializing a v1 log.
+    regen_derived_files = true;
+    filesystem->DeleteFile(MakeDocumentLogFilenameV0(base_dir).c_str());
   }
+#endif  // ENABLED_V1_MIGRATION
 
   ICING_ASSIGN_OR_RETURN(
       PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
@@ -97,7 +106,7 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
               /*compress_in=*/true)));
 
   CreateResult create_result = {std::move(log_create_result),
-                                preexisting_file_version, new_file};
+                                regen_derived_files};
   return create_result;
 }
 
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
index be8feed..51cf497 100644
--- a/icing/store/document-log-creator.h
+++ b/icing/store/document-log-creator.h
@@ -30,20 +30,14 @@ namespace lib {
 // be necessary.
 class DocumentLogCreator {
  public:
-  // Version 0 refers to FileBackedProtoLog
-  // Version 1 refers to PortableFileBackedProtoLog with kFileFormatVersion = 0
-  static constexpr int32_t kCurrentVersion = 1;
   struct CreateResult {
     // The create result passed up from the PortableFileBackedProtoLog::Create.
     // Contains the document log.
     PortableFileBackedProtoLog<DocumentWrapper>::CreateResult log_create_result;
 
-    // The version number of the pre-existing document log file.
-    // If there is no document log file, it will be set to kCurrentVersion.
-    int preexisting_file_version;
-
-    // Whether the created file is new.
-    bool new_file;
+    // Whether the caller needs to also regenerate/generate any derived files
+    // based off of the initialized document log.
+    bool regen_derived_files;
   };
 
   // Creates the document log in the base_dir. Will create one if it doesn't
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 8c8369c..226a96b 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -164,32 +164,6 @@ int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
   return expiration_timestamp_ms;
 }
 
-InitializeStatsProto::RecoveryCause GetRecoveryCause(
-    const DocumentLogCreator::CreateResult& create_result,
-    bool force_recovery_and_revalidate_documents) {
-  if (force_recovery_and_revalidate_documents) {
-    return InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC;
-  } else if (create_result.log_create_result.has_data_loss()) {
-    return InitializeStatsProto::DATA_LOSS;
-  } else if (create_result.preexisting_file_version !=
-             DocumentLogCreator::kCurrentVersion) {
-    return InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT;
-  }
-  return InitializeStatsProto::NONE;
-}
-
-InitializeStatsProto::DocumentStoreDataStatus GetDataStatus(
-    DataLoss data_loss) {
-  switch (data_loss) {
-    case DataLoss::PARTIAL:
-      return InitializeStatsProto::PARTIAL_LOSS;
-    case DataLoss::COMPLETE:
-      return InitializeStatsProto::COMPLETE_LOSS;
-    case DataLoss::NONE:
-      return InitializeStatsProto::NO_DATA_LOSS;
-  }
-}
-
 }  // namespace
 
 DocumentStore::DocumentStore(const Filesystem* filesystem,
@@ -262,34 +236,44 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
       std::move(create_result_or).ValueOrDie();
 
   document_log_ = std::move(create_result.log_create_result.proto_log);
-  InitializeStatsProto::RecoveryCause recovery_cause =
-      GetRecoveryCause(create_result, force_recovery_and_revalidate_documents);
-
-  if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) {
-    ICING_LOG(WARNING) << "Starting Document Store Recovery with cause="
-        << recovery_cause << ", and create result { new_file="
-        << create_result.new_file << ", preeisting_file_version="
-        << create_result.preexisting_file_version << ", data_loss="
-        << create_result.log_create_result.data_loss << "} and kCurrentVersion="
-        << DocumentLogCreator::kCurrentVersion;
+
+  if (create_result.regen_derived_files ||
+      force_recovery_and_revalidate_documents ||
+      create_result.log_create_result.has_data_loss()) {
     // We can't rely on any existing derived files. Recreate them from scratch.
     // Currently happens if:
     //   1) This is a new log and we don't have derived files yet
     //   2) Client wanted us to force a regeneration.
     //   3) Log has some data loss, can't rely on existing derived data.
+    if (create_result.log_create_result.has_data_loss() &&
+        initialize_stats != nullptr) {
+      ICING_LOG(WARNING)
+          << "Data loss in document log, regenerating derived files.";
+      initialize_stats->set_document_store_recovery_cause(
+          InitializeStatsProto::DATA_LOSS);
+
+      if (create_result.log_create_result.data_loss == DataLoss::PARTIAL) {
+        // Ground truth is partially lost.
+        initialize_stats->set_document_store_data_status(
+            InitializeStatsProto::PARTIAL_LOSS);
+      } else {
+        // Ground truth is completely lost.
+        initialize_stats->set_document_store_data_status(
+            InitializeStatsProto::COMPLETE_LOSS);
+      }
+    }
+
     std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
     libtextclassifier3::Status status =
         RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
     if (initialize_stats != nullptr &&
-        recovery_cause != InitializeStatsProto::NONE) {
+        (force_recovery_and_revalidate_documents ||
+         create_result.log_create_result.has_data_loss())) {
       // Only consider it a recovery if the client forced a recovery or there
       // was data loss. Otherwise, this could just be the first time we're
       // initializing and generating derived files.
       initialize_stats->set_document_store_recovery_latency_ms(
           document_recovery_timer->GetElapsedMilliseconds());
-      initialize_stats->set_document_store_recovery_cause(recovery_cause);
-      initialize_stats->set_document_store_data_status(
-          GetDataStatus(create_result.log_create_result.data_loss));
     }
     if (!status.ok()) {
       ICING_LOG(ERROR)
@@ -298,13 +282,13 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
     }
   } else {
     if (!InitializeExistingDerivedFiles().ok()) {
-      ICING_LOG(WARNING)
+      ICING_VLOG(1)
           << "Couldn't find derived files or failed to initialize them, "
              "regenerating derived files for DocumentStore.";
       std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
       libtextclassifier3::Status status = RegenerateDerivedFiles(
-          /*force_recovery_and_revalidate_documents=*/false);
-      if (initialize_stats != nullptr) {
+          /*force_recovery_and_revalidate_documents*/ false);
+      if (initialize_stats != nullptr && num_documents() > 0) {
         initialize_stats->set_document_store_recovery_cause(
             InitializeStatsProto::IO_ERROR);
         initialize_stats->set_document_store_recovery_latency_ms(
@@ -431,19 +415,7 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
   // Iterates through document log
   auto iterator = document_log_->GetIterator();
   auto iterator_status = iterator.Advance();
-  libtextclassifier3::StatusOr<int64_t> element_size =
-      document_log_->GetElementsFileSize();
-  libtextclassifier3::StatusOr<int64_t> disk_usage =
-      document_log_->GetDiskUsage();
-  if (element_size.ok() && disk_usage.ok()) {
-    ICING_VLOG(1) << "Starting recovery of document store. Document store "
-                     "elements file size:"
-                  << element_size.ValueOrDie()
-                  << ", disk usage=" << disk_usage.ValueOrDie();
-  }
   while (iterator_status.ok()) {
-    ICING_VLOG(2) << "Attempting to read document at offset="
-                  << iterator.GetOffset();
     libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
         document_log_->ReadProto(iterator.GetOffset());
 
@@ -558,7 +530,7 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
 libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   document_key_mapper_.reset();
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status =
       KeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
@@ -568,7 +540,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
     return status;
   }
 
-  // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
   auto document_key_mapper_or =
       KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize);
@@ -584,7 +556,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
 libtextclassifier3::Status DocumentStore::ResetDocumentIdMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   document_id_mapper_.reset();
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = FileBackedVector<int64_t>::Delete(
       *filesystem_, MakeDocumentIdMapperFilename(base_dir_));
@@ -593,7 +565,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentIdMapper() {
                      << "Failed to delete old document_id mapper";
     return status;
   }
-  // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
   auto document_id_mapper_or = FileBackedVector<int64_t>::Create(
       *filesystem_, MakeDocumentIdMapperFilename(base_dir_),
@@ -646,7 +618,7 @@ libtextclassifier3::Status DocumentStore::ResetFilterCache() {
 libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   namespace_mapper_.reset();
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = KeyMapper<NamespaceId>::Delete(
       *filesystem_, MakeNamespaceMapperFilename(base_dir_));
@@ -666,7 +638,7 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
 libtextclassifier3::Status DocumentStore::ResetCorpusMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   corpus_mapper_.reset();
-  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete(
       *filesystem_, MakeCorpusMapperFilename(base_dir_));
@@ -1777,63 +1749,5 @@ libtextclassifier3::Status DocumentStore::SetUsageScores(
   return usage_store_->SetUsageScores(document_id, usage_scores);
 }
 
-libtextclassifier3::StatusOr<
-    google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
-DocumentStore::CollectCorpusInfo() const {
-  google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>
-      corpus_info;
-  libtextclassifier3::StatusOr<const SchemaProto*> schema_proto_or =
-      schema_store_->GetSchema();
-  if (!schema_proto_or.ok()) {
-    return corpus_info;
-  }
-  // Maps from CorpusId to the corresponding protocol buffer in the result.
-  std::unordered_map<CorpusId, DocumentDebugInfoProto::CorpusInfo*> info_map;
-  std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
-      namespace_mapper_->GetValuesToKeys();
-  const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
-  for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
-       ++document_id) {
-    if (!InternalDoesDocumentExist(document_id)) {
-      continue;
-    }
-    ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
-                           filter_cache_->Get(document_id));
-    ICING_ASSIGN_OR_RETURN(const DocumentAssociatedScoreData* score_data,
-                           score_cache_->Get(document_id));
-    const std::string& name_space =
-        namespace_id_to_namespace[filter_data->namespace_id()];
-    const std::string& schema =
-        schema_proto->types()[filter_data->schema_type_id()].schema_type();
-    auto iter = info_map.find(score_data->corpus_id());
-    if (iter == info_map.end()) {
-      DocumentDebugInfoProto::CorpusInfo* entry = corpus_info.Add();
-      entry->set_namespace_(name_space);
-      entry->set_schema(schema);
-      iter = info_map.insert({score_data->corpus_id(), entry}).first;
-    }
-    iter->second->set_total_documents(iter->second->total_documents() + 1);
-    iter->second->set_total_token(iter->second->total_token() +
-                                  score_data->length_in_tokens());
-  }
-  return corpus_info;
-}
-
-libtextclassifier3::StatusOr<DocumentDebugInfoProto>
-DocumentStore::GetDebugInfo(int verbosity) const {
-  DocumentDebugInfoProto debug_info;
-  *debug_info.mutable_document_storage_info() = GetStorageInfo();
-  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
-  debug_info.set_crc(crc.Get());
-  if (verbosity > 0) {
-    ICING_ASSIGN_OR_RETURN(google::protobuf::RepeatedPtrField<
-                               DocumentDebugInfoProto::CorpusInfo>
-                               corpus_info,
-                           CollectCorpusInfo());
-    *debug_info.mutable_corpus_info() = std::move(corpus_info);
-  }
-  return debug_info;
-}
-
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index e6d2e5c..a60aab1 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -27,7 +27,6 @@
 #include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/portable-file-backed-proto-log.h"
-#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
 #include "icing/proto/logging.pb.h"
@@ -423,17 +422,6 @@ class DocumentStore {
   //   INTERNAL_ERROR on compute error
   libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
 
-  // Get debug information for the document store.
-  // verbosity <= 0, simplest debug information
-  // verbosity > 0, also return the total number of documents and tokens in each
-  // (namespace, schema type) pair.
-  //
-  // Returns:
-  //   DocumentDebugInfoProto on success
-  //   INTERNAL_ERROR on IO errors, crc compute error
-  libtextclassifier3::StatusOr<DocumentDebugInfoProto> GetDebugInfo(
-      int verbosity) const;
-
  private:
   // Use DocumentStore::Create() to instantiate.
   DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
@@ -509,6 +497,28 @@ class DocumentStore {
       bool force_recovery_and_revalidate_documents,
       InitializeStatsProto* initialize_stats);
 
+  // Initializes a new DocumentStore and sets up any underlying files.
+  //
+  // Returns:
+  //   Data loss status on success, effectively always DataLoss::NONE
+  //   INTERNAL on I/O error
+  libtextclassifier3::StatusOr<DataLoss> InitializeNewStore(
+      InitializeStatsProto* initialize_stats);
+
+  // Initializes a DocumentStore over an existing directory of files.
+  //
+  // stats will be set if non-null
+  //
+  // Returns:
+  //   Data loss status on success
+  //   INTERNAL on I/O error
+  libtextclassifier3::StatusOr<DataLoss> InitializeExistingStore(
+      bool force_recovery_and_revalidate_documents,
+      InitializeStatsProto* initialize_stats);
+
+  libtextclassifier3::StatusOr<DataLoss> MigrateFromV0ToV1(
+      InitializeStatsProto* initialize_stats);
+
   // Creates sub-components and verifies the integrity of each sub-component.
   // This assumes that the the underlying files already exist, and will return
   // an error if it doesn't find what it's expecting.
@@ -708,13 +718,6 @@ class DocumentStore {
   //     the document_id_mapper somehow became larger than the filter cache.
   DocumentStorageInfoProto CalculateDocumentStatusCounts(
       DocumentStorageInfoProto storage_info) const;
-
-  // Returns:
-  //   - on success, a RepeatedPtrField for CorpusInfo collected.
-  //   - OUT_OF_RANGE, this should never happen.
-  libtextclassifier3::StatusOr<google::protobuf::RepeatedPtrField<
-      DocumentDebugInfoProto::CorpusInfo>>
-  CollectCorpusInfo() const;
 };
 
 }  // namespace lib
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index fc3fd9d..ce608fc 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -32,7 +32,6 @@
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/proto/document.pb.h"
-#include "icing/proto/persist.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
@@ -64,13 +63,13 @@ namespace lib {
 
 namespace {
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
 
 class DestructibleDirectory {
  public:
@@ -256,74 +255,6 @@ void BM_Delete(benchmark::State& state) {
 }
 BENCHMARK(BM_Delete);
 
-void BM_Create(benchmark::State& state) {
-  Filesystem filesystem;
-  Clock clock;
-
-  std::string directory = GetTestTempDir() + "/icing";
-  std::string document_store_dir = directory + "/store";
-
-  std::unique_ptr<SchemaStore> schema_store =
-      CreateSchemaStore(filesystem, directory, &clock);
-
-  // Create an initial document store and put some data in.
-  {
-    DestructibleDirectory ddir(filesystem, directory);
-
-    filesystem.CreateDirectoryRecursively(document_store_dir.data());
-    ICING_ASSERT_OK_AND_ASSIGN(
-        DocumentStore::CreateResult create_result,
-        DocumentStore::Create(&filesystem, document_store_dir, &clock,
-                              schema_store.get()));
-    std::unique_ptr<DocumentStore> document_store =
-        std::move(create_result.document_store);
-
-    DocumentProto document = CreateDocument("namespace", "uri");
-    ICING_ASSERT_OK(document_store->Put(document));
-    ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::FULL));
-  }
-
-  // Recreating it with some content to checksum over.
-  DestructibleDirectory ddir(filesystem, directory);
-
-  filesystem.CreateDirectoryRecursively(document_store_dir.data());
-
-  for (auto s : state) {
-    benchmark::DoNotOptimize(DocumentStore::Create(
-        &filesystem, document_store_dir, &clock, schema_store.get()));
-  }
-}
-BENCHMARK(BM_Create);
-
-void BM_ComputeChecksum(benchmark::State& state) {
-  Filesystem filesystem;
-  Clock clock;
-
-  std::string directory = GetTestTempDir() + "/icing";
-  DestructibleDirectory ddir(filesystem, directory);
-
-  std::string document_store_dir = directory + "/store";
-  std::unique_ptr<SchemaStore> schema_store =
-      CreateSchemaStore(filesystem, directory, &clock);
-
-  filesystem.CreateDirectoryRecursively(document_store_dir.data());
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem, document_store_dir, &clock,
-                            schema_store.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  DocumentProto document = CreateDocument("namespace", "uri");
-  ICING_ASSERT_OK(document_store->Put(document));
-  ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::LITE));
-
-  for (auto s : state) {
-    benchmark::DoNotOptimize(document_store->ComputeChecksum());
-  }
-}
-BENCHMARK(BM_ComputeChecksum);
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index a30b4e4..3ed4c4e 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -29,6 +29,7 @@
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
 #include "icing/file/mock-filesystem.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
@@ -44,7 +45,6 @@
 #include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
@@ -85,16 +85,16 @@ const NamespaceStorageInfoProto& GetNamespaceStorageInfo(
   return std::move(NamespaceStorageInfoProto());
 }
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
 
-constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
-    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+    StringIndexingConfig_TokenizerType_Code_PLAIN;
 
-constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
 
-constexpr PropertyConfigProto::DataType::Code TYPE_INT =
-    PropertyConfigProto::DataType::INT64;
+constexpr PropertyConfigProto_DataType_Code TYPE_INT =
+    PropertyConfigProto_DataType_Code_INT64;
 
 UsageReport CreateUsageReport(std::string name_space, std::string uri,
                               int64 timestamp_ms,
@@ -3170,6 +3170,15 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
   ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
 }
 
+// TODO(b/185845269) Re-enable this test by copying over a full valid set of
+// document store files. Right now this test only includes the score_cache and
+// the document store header.
+//
+// This causes a problem now because this cl changes behavior to not consider an
+// InitializeExistingDerivedFiles failure to be a recovery if there is nothing
+// to recover because the doocument store is empty.
+#define DISABLE_BACKWARDS_COMPAT_TEST
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
 TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
   // The directory testdata/score_cache_without_length_in_tokens/document_store
   // contains only the scoring_cache and the document_store_header (holding the
@@ -3185,26 +3194,29 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
 
   // Get src files
   std::string document_store_without_length_in_tokens;
-  if (IsAndroidArm() || IsIosPlatform()) {
+  if (IsAndroidPlatform() || IsIosPlatform()) {
     document_store_without_length_in_tokens = GetTestFilePath(
         "icing/testdata/score_cache_without_length_in_tokens/"
         "document_store_android_ios_compatible");
-  } else if (IsAndroidX86()) {
-    document_store_without_length_in_tokens = GetTestFilePath(
-        "icing/testdata/score_cache_without_length_in_tokens/"
-        "document_store_android_x86");
   } else {
     document_store_without_length_in_tokens = GetTestFilePath(
         "icing/testdata/score_cache_without_length_in_tokens/"
         "document_store");
   }
+  std::vector<std::string> document_store_files;
   Filesystem filesystem;
-  ICING_LOG(INFO) << "Copying files "
-                  << document_store_without_length_in_tokens;
-  ASSERT_THAT(
-      filesystem.CopyDirectory(document_store_without_length_in_tokens.c_str(),
-                               document_store_dir_.c_str(), /*recursive=*/true),
-      true);
+  filesystem.ListDirectory(document_store_without_length_in_tokens.c_str(),
+                           &document_store_files);
+
+  ICING_LOG(INFO) << "Copying files " << document_store_without_length_in_tokens
+                  << ' ' << document_store_files.size();
+  for (size_t i = 0; i != document_store_files.size(); i++) {
+    std::string src = absl_ports::StrCat(
+        document_store_without_length_in_tokens, "/", document_store_files[i]);
+    std::string dst =
+        absl_ports::StrCat(document_store_dir_, "/", document_store_files[i]);
+    ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true);
+  }
 
   InitializeStatsProto initialize_stats;
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -3215,11 +3227,12 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
                             &initialize_stats));
   std::unique_ptr<DocumentStore> doc_store =
       std::move(create_result.document_store);
-  // The document log is using the legacy v0 format so that a migration is
-  // needed, which will also trigger regeneration.
-  EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
-            InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
+  // The store_cache trigger regeneration because its element size is
+  // inconsistent: expected 20 (current new size), actual 12 (as per the v0
+  // score_cache).
+  EXPECT_TRUE(initialize_stats.has_document_store_recovery_cause());
 }
+#endif  // DISABLE_BACKWARDS_COMPAT_TEST
 
 TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -3409,22 +3422,18 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
 
   {
     // Create the document store the second time and force recovery
-    InitializeStatsProto initialize_stats;
     ICING_ASSERT_OK_AND_ASSIGN(
         DocumentStore::CreateResult create_result,
-        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                              schema_store.get(),
-                              /*force_recovery_and_revalidate_documents=*/true,
-                              &initialize_stats));
+        DocumentStore::Create(
+            &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/true));
     std::unique_ptr<DocumentStore> doc_store =
         std::move(create_result.document_store);
 
     // Ensure that the type id of the email document has been correctly updated.
     ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
                                doc_store->GetDocumentFilterData(docid));
-    EXPECT_THAT(filter_data.schema_type_id(), Eq(1));
-    EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
-                Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+    ASSERT_THAT(filter_data.schema_type_id(), Eq(1));
   }
 }
 
@@ -3547,6 +3556,7 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
       SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
   ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
 
+  DocumentId docid = kInvalidDocumentId;
   DocumentProto docWithBody =
       DocumentBuilder()
           .SetKey("icing", "email/1")
@@ -3579,12 +3589,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
     std::unique_ptr<DocumentStore> doc_store =
         std::move(create_result.document_store);
 
-    DocumentId docid = kInvalidDocumentId;
     ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
-    ASSERT_NE(docid, kInvalidDocumentId);
-    docid = kInvalidDocumentId;
     ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
-    ASSERT_NE(docid, kInvalidDocumentId);
 
     ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
                 IsOkAndHolds(EqualsProto(docWithBody)));
@@ -3652,6 +3658,7 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
       SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
   ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
 
+  DocumentId docid = kInvalidDocumentId;
   DocumentProto docWithBody =
       DocumentBuilder()
           .SetKey("icing", "email/1")
@@ -3684,12 +3691,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
     std::unique_ptr<DocumentStore> doc_store =
         std::move(create_result.document_store);
 
-    DocumentId docid = kInvalidDocumentId;
     ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
-    ASSERT_NE(docid, kInvalidDocumentId);
-    docid = kInvalidDocumentId;
     ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
-    ASSERT_NE(docid, kInvalidDocumentId);
 
     ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
                 IsOkAndHolds(EqualsProto(docWithBody)));
@@ -3832,8 +3835,7 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
   // Check that we didn't lose anything. A migration also doesn't technically
   // count as a recovery.
   EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
-  EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
-            InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
+  EXPECT_FALSE(initialize_stats.has_document_store_recovery_cause());
 
   // Document 1 and 3 were put normally, and document 2 was deleted in our
   // testdata files.
@@ -3856,164 +3858,6 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
 }
 #endif  // DISABLE_BACKWARDS_COMPAT_TEST
 
-TEST_F(DocumentStoreTest, GetDebugInfo) {
-  SchemaProto schema =
-      SchemaBuilder()
-          .AddType(SchemaTypeConfigBuilder()
-                       .SetType("email")
-                       .AddProperty(
-                           PropertyConfigBuilder()
-                               .SetName("subject")
-                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
-                               .SetCardinality(CARDINALITY_OPTIONAL))
-                       .AddProperty(
-                           PropertyConfigBuilder()
-                               .SetName("body")
-                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
-                               .SetCardinality(CARDINALITY_OPTIONAL)))
-          .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
-              PropertyConfigBuilder()
-                  .SetName("name")
-                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
-                  .SetCardinality(CARDINALITY_OPTIONAL)))
-          .Build();
-  std::string schema_store_dir = schema_store_dir_ + "_custom";
-  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
-  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
-
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-
-  DocumentProto document1 = DocumentBuilder()
-                                .SetKey("namespace1", "email/1")
-                                .SetSchema("email")
-                                .AddStringProperty("subject", "aa bb cc")
-                                .AddStringProperty("body", "dd ee")
-                                .SetCreationTimestampMs(1)
-                                .Build();
-  ICING_ASSERT_OK(document_store->Put(document1, 5));
-
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace2", "email/2")
-                                .SetSchema("email")
-                                .AddStringProperty("subject", "aa bb")
-                                .AddStringProperty("body", "cc")
-                                .SetCreationTimestampMs(1)
-                                .Build();
-  ICING_ASSERT_OK(document_store->Put(document2, 3));
-
-  DocumentProto document3 = DocumentBuilder()
-                                .SetKey("namespace2", "email/3")
-                                .SetSchema("email")
-                                .AddStringProperty("subject", "aa")
-                                .AddStringProperty("body", "")
-                                .SetCreationTimestampMs(1)
-                                .Build();
-  ICING_ASSERT_OK(document_store->Put(document3, 1));
-
-  DocumentProto document4 = DocumentBuilder()
-                                .SetKey("namespace1", "person/1")
-                                .SetSchema("person")
-                                .AddStringProperty("name", "test test")
-                                .SetCreationTimestampMs(1)
-                                .Build();
-  ICING_ASSERT_OK(document_store->Put(document4, 2));
-
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out1,
-                             document_store->GetDebugInfo(/*verbosity=*/1));
-  EXPECT_THAT(out1.crc(), Gt(0));
-  EXPECT_THAT(out1.document_storage_info().num_alive_documents(), Eq(4));
-  EXPECT_THAT(out1.document_storage_info().num_deleted_documents(), Eq(0));
-  EXPECT_THAT(out1.document_storage_info().num_expired_documents(), Eq(0));
-
-  DocumentDebugInfoProto::CorpusInfo info1, info2, info3;
-  info1.set_namespace_("namespace1");
-  info1.set_schema("email");
-  info1.set_total_documents(1);  // document1
-  info1.set_total_token(5);
-
-  info2.set_namespace_("namespace2");
-  info2.set_schema("email");
-  info2.set_total_documents(2);  // document2 and document3
-  info2.set_total_token(4);      // 3 + 1
-
-  info3.set_namespace_("namespace1");
-  info3.set_schema("person");
-  info3.set_total_documents(1);  // document4
-  info3.set_total_token(2);
-
-  EXPECT_THAT(out1.corpus_info(),
-              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2),
-                                   EqualsProto(info3)));
-
-  // Delete document3.
-  ICING_ASSERT_OK(document_store->Delete("namespace2", "email/3"));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out2,
-                             document_store->GetDebugInfo(/*verbosity=*/1));
-  EXPECT_THAT(out2.crc(), Gt(0));
-  EXPECT_THAT(out2.crc(), Not(Eq(out1.crc())));
-  EXPECT_THAT(out2.document_storage_info().num_alive_documents(), Eq(3));
-  EXPECT_THAT(out2.document_storage_info().num_deleted_documents(), Eq(1));
-  EXPECT_THAT(out2.document_storage_info().num_expired_documents(), Eq(0));
-  info2.set_total_documents(1);  // document2
-  info2.set_total_token(3);
-  EXPECT_THAT(out2.corpus_info(),
-              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2),
-                                   EqualsProto(info3)));
-
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out3,
-                             document_store->GetDebugInfo(/*verbosity=*/0));
-  EXPECT_THAT(out3.corpus_info(), IsEmpty());
-}
-
-TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
-  std::string schema_store_dir = schema_store_dir_ + "_custom";
-  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
-  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out,
-                             document_store->GetDebugInfo(/*verbosity=*/1));
-  EXPECT_THAT(out.crc(), Gt(0));
-  EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0));
-  EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0));
-  EXPECT_THAT(out.document_storage_info().num_expired_documents(), Eq(0));
-  EXPECT_THAT(out.corpus_info(), IsEmpty());
-}
-
-TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> document_store =
-      std::move(create_result.document_store);
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out,
-                             document_store->GetDebugInfo(/*verbosity=*/1));
-  EXPECT_THAT(out.crc(), Gt(0));
-  EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0));
-  EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0));
-  EXPECT_THAT(out.document_storage_info().num_expired_documents(), Eq(0));
-  EXPECT_THAT(out.corpus_info(), IsEmpty());
-}
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/store/namespace-checker-impl.h b/icing/store/namespace-checker-impl.h
deleted file mode 100644
index bcd0643..0000000
--- a/icing/store/namespace-checker-impl.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_STORE_NAMESPACE_CHECKER_IMPL_H_
-#define ICING_STORE_NAMESPACE_CHECKER_IMPL_H_
-
-#include "icing/store/document-id.h"
-#include "icing/store/document-store.h"
-#include "icing/store/namespace-checker.h"
-#include "icing/store/namespace-id.h"
-
-namespace icing {
-namespace lib {
-
-class NamespaceCheckerImpl : public NamespaceChecker {
- public:
-  explicit NamespaceCheckerImpl(
-      const DocumentStore* document_store,
-      std::unordered_set<NamespaceId> target_namespace_ids)
-      : document_store_(*document_store),
-        target_namespace_ids_(std::move(target_namespace_ids)) {}
-
-  bool BelongsToTargetNamespaces(DocumentId document_id) const override {
-    if (target_namespace_ids_.empty()) {
-      return true;
-    }
-    auto document_filter_data_or_ =
-        document_store_.GetDocumentFilterData(document_id);
-    return document_filter_data_or_.ok() &&
-        target_namespace_ids_.count(
-            document_filter_data_or_.ValueOrDie().namespace_id())> 0;
-  }
-  const DocumentStore& document_store_;
-  std::unordered_set<NamespaceId> target_namespace_ids_;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_STORE_NAMESPACE_CHECKER_IMPL_H_
-\ No newline at end of file
diff --git a/icing/store/namespace-checker.h b/icing/store/namespace-checker.h
deleted file mode 100644
index 8812ab1..0000000
--- a/icing/store/namespace-checker.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_STORE_NAMESPACE_CHECKER_H_
-#define ICING_STORE_NAMESPACE_CHECKER_H_
-
-#include "icing/store/document-id.h"
-
-namespace icing {
-namespace lib {
-
-class NamespaceChecker {
- public:
-  virtual ~NamespaceChecker() = default;
-
-  // Check whether the given document id is belongs to the target namespaces.
-  // Returns:
-  //   On success,
-  //     - true:  the given document id belongs to the target namespaces
-  //     - false: the given document id doesn't belong to the target namespaces
-  //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
-  //                DocumentIds
-  //   NOT_FOUND if the document or the filter data is not found
-  //   INTERNAL_ERROR on all other errors
-  virtual bool BelongsToTargetNamespaces(DocumentId document_id) const = 0;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_STORE_NAMESPACE_CHECKER_H_
diff --git a/icing/testing/always-true-namespace-checker-impl.h b/icing/testing/always-true-namespace-checker-impl.h
deleted file mode 100644
index f7744b6..0000000
--- a/icing/testing/always-true-namespace-checker-impl.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TESTING_ALWAYS_TRUE_NAMESPACE_CHECKER_IMPL_H_
-#define ICING_TESTING_ALWAYS_TRUE_NAMESPACE_CHECKER_IMPL_H_
-
-#include "icing/store/document-id.h"
-#include "icing/store/namespace-checker.h"
-
-namespace icing {
-namespace lib {
-
-class AlwaysTrueNamespaceCheckerImpl : public NamespaceChecker {
- public:
-  bool BelongsToTargetNamespaces(DocumentId document_id) const override {
-    return true;
-  }
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TESTING_ALWAYS_TRUE_NAMESPACE_CHECKER_IMPL_H_
-\ No newline at end of file
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index f83fe0a..8d8bdf2 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -121,6 +121,7 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
   const SchemaStore::SetSchemaResult& actual = arg;
 
   if (actual.success == expected.success &&
+      actual.index_incompatible == expected.index_incompatible &&
       actual.old_schema_type_ids_changed ==
           expected.old_schema_type_ids_changed &&
       actual.schema_types_deleted_by_name ==
@@ -130,12 +131,7 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
       actual.schema_types_incompatible_by_name ==
           expected.schema_types_incompatible_by_name &&
       actual.schema_types_incompatible_by_id ==
-          expected.schema_types_incompatible_by_id &&
-      actual.schema_types_new_by_name == expected.schema_types_new_by_name &&
-      actual.schema_types_changed_fully_compatible_by_name ==
-          expected.schema_types_changed_fully_compatible_by_name &&
-      actual.schema_types_index_incompatible_by_name ==
-          expected.schema_types_index_incompatible_by_name) {
+          expected.schema_types_incompatible_by_id) {
     return true;
   }
 
@@ -195,82 +191,37 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
                           absl_ports::NumberFormatter()),
       "]");
 
-  // Format schema_types_new_by_name
-  std::string actual_schema_types_new_by_name = absl_ports::StrCat(
-      "[", absl_ports::StrJoin(actual.schema_types_new_by_name, ","), "]");
-
-  std::string expected_schema_types_new_by_name = absl_ports::StrCat(
-      "[", absl_ports::StrJoin(expected.schema_types_new_by_name, ","), "]");
-
-  // Format schema_types_changed_fully_compatible_by_name
-  std::string actual_schema_types_changed_fully_compatible_by_name =
-      absl_ports::StrCat(
-          "[",
-          absl_ports::StrJoin(
-              actual.schema_types_changed_fully_compatible_by_name, ","),
-          "]");
-
-  std::string expected_schema_types_changed_fully_compatible_by_name =
-      absl_ports::StrCat(
-          "[",
-          absl_ports::StrJoin(
-              expected.schema_types_changed_fully_compatible_by_name, ","),
-          "]");
-
-  // Format schema_types_deleted_by_id
-  std::string actual_schema_types_index_incompatible_by_name =
-      absl_ports::StrCat(
-          "[",
-          absl_ports::StrJoin(actual.schema_types_index_incompatible_by_name,
-                              ","),
-          "]");
-
-  std::string expected_schema_types_index_incompatible_by_name =
-      absl_ports::StrCat(
-          "[",
-          absl_ports::StrJoin(expected.schema_types_index_incompatible_by_name,
-                              ","),
-          "]");
-
   *result_listener << IcingStringUtil::StringPrintf(
       "\nExpected {\n"
       "\tsuccess=%d,\n"
+      "\tindex_incompatible=%d,\n"
       "\told_schema_type_ids_changed=%s,\n"
       "\tschema_types_deleted_by_name=%s,\n"
       "\tschema_types_deleted_by_id=%s,\n"
       "\tschema_types_incompatible_by_name=%s,\n"
       "\tschema_types_incompatible_by_id=%s\n"
-      "\tschema_types_new_by_name=%s,\n"
-      "\tschema_types_index_incompatible_by_name=%s,\n"
-      "\tschema_types_changed_fully_compatible_by_name=%s\n"
       "}\n"
       "Actual {\n"
       "\tsuccess=%d,\n"
+      "\tindex_incompatible=%d,\n"
       "\told_schema_type_ids_changed=%s,\n"
       "\tschema_types_deleted_by_name=%s,\n"
       "\tschema_types_deleted_by_id=%s,\n"
       "\tschema_types_incompatible_by_name=%s,\n"
       "\tschema_types_incompatible_by_id=%s\n"
-      "\tschema_types_new_by_name=%s,\n"
-      "\tschema_types_index_incompatible_by_name=%s,\n"
-      "\tschema_types_changed_fully_compatible_by_name=%s\n"
       "}\n",
-      expected.success, expected_old_schema_type_ids_changed.c_str(),
+      expected.success, expected.index_incompatible,
+      expected_old_schema_type_ids_changed.c_str(),
       expected_schema_types_deleted_by_name.c_str(),
       expected_schema_types_deleted_by_id.c_str(),
       expected_schema_types_incompatible_by_name.c_str(),
-      expected_schema_types_incompatible_by_id.c_str(),
-      expected_schema_types_new_by_name.c_str(),
-      expected_schema_types_changed_fully_compatible_by_name.c_str(),
-      expected_schema_types_index_incompatible_by_name.c_str(), actual.success,
-      actual_old_schema_type_ids_changed.c_str(),
+      expected_schema_types_incompatible_by_id.c_str(), actual.success,
+      actual.index_incompatible, actual_old_schema_type_ids_changed.c_str(),
       actual_schema_types_deleted_by_name.c_str(),
       actual_schema_types_deleted_by_id.c_str(),
       actual_schema_types_incompatible_by_name.c_str(),
-      actual_schema_types_incompatible_by_id.c_str(),
-      actual_schema_types_new_by_name.c_str(),
-      actual_schema_types_changed_fully_compatible_by_name.c_str(),
-      actual_schema_types_index_incompatible_by_name.c_str());
+      actual_schema_types_incompatible_by_id.c_str());
+
   return false;
 }
 
diff --git a/icing/testing/random-string.cc b/icing/testing/random-string.cc
deleted file mode 100644
index 27f83bc..0000000
--- a/icing/testing/random-string.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/testing/random-string.h"
-
-namespace icing {
-namespace lib {
-
-std::vector<std::string> GenerateUniqueTerms(int num_terms) {
-  char before_a = 'a' - 1;
-  std::string term(1, before_a);
-  std::vector<std::string> terms;
-  int current_char = 0;
-  for (int permutation = 0; permutation < num_terms; ++permutation) {
-    if (term[current_char] != 'z') {
-      ++term[current_char];
-    } else {
-      if (current_char < term.length() - 1) {
-        // The string currently looks something like this "zzzaa"
-        // 1. Find the first char after this one that isn't
-        current_char = term.find_first_not_of('z', current_char);
-        if (current_char != std::string::npos) {
-          // 2. Increment that character
-          ++term[current_char];
-
-          // 3. Set every character prior to current_char to 'a'
-          term.replace(0, current_char, current_char, 'a');
-        } else {
-          // Every character in this string is a 'z'. We need to grow.
-          term = std::string(term.length() + 1, 'a');
-        }
-      } else {
-        term = std::string(term.length() + 1, 'a');
-      }
-      current_char = 0;
-    }
-    terms.push_back(term);
-  }
-  return terms;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/testing/random-string.h b/icing/testing/random-string.h
index fd8d87b..ac36924 100644
--- a/icing/testing/random-string.h
+++ b/icing/testing/random-string.h
@@ -15,7 +15,6 @@
 #ifndef ICING_TESTING_RANDOM_STRING_H_
 #define ICING_TESTING_RANDOM_STRING_H_
 
-#include <algorithm>
 #include <random>
 #include <string>
 
@@ -37,10 +36,6 @@ std::string RandomString(const std::string_view alphabet, size_t len,
   return result;
 }
 
-// Returns a vector containing num_terms unique terms. Terms are created in
-// non-random order starting with "a" to "z" to "aa" to "zz", etc.
-std::vector<std::string> GenerateUniqueTerms(int num_terms);
-
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/testing/random-string_test.cc b/icing/testing/random-string_test.cc
deleted file mode 100644
index 759fec0..0000000
--- a/icing/testing/random-string_test.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/testing/random-string.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::IsEmpty;
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-TEST(RandomStringTest, GenerateUniqueTerms) {
-  EXPECT_THAT(GenerateUniqueTerms(0), IsEmpty());
-  EXPECT_THAT(GenerateUniqueTerms(1), ElementsAre("a"));
-  EXPECT_THAT(GenerateUniqueTerms(4), ElementsAre("a", "b", "c", "d"));
-  EXPECT_THAT(GenerateUniqueTerms(29),
-              ElementsAre("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
-                          "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
-                          "w", "x", "y", "z", "aa", "ba", "ca"));
-  EXPECT_THAT(GenerateUniqueTerms(56),
-              ElementsAre("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
-                          "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
-                          "w", "x", "y", "z", "aa", "ba", "ca", "da", "ea",
-                          "fa", "ga", "ha", "ia", "ja", "ka", "la", "ma", "na",
-                          "oa", "pa", "qa", "ra", "sa", "ta", "ua", "va", "wa",
-                          "xa", "ya", "za", "ab", "bb", "cb", "db"));
-  EXPECT_THAT(GenerateUniqueTerms(56).at(54), Eq("cb"));
-  EXPECT_THAT(GenerateUniqueTerms(26 * 26 * 26).at(26), Eq("aa"));
-  EXPECT_THAT(GenerateUniqueTerms(26 * 26 * 26).at(26 * 27), Eq("aaa"));
-  EXPECT_THAT(GenerateUniqueTerms(26 * 26 * 26).at(26 * 27 - 6), Eq("uz"));
-  EXPECT_THAT(GenerateUniqueTerms(26 * 26 * 26).at(26 * 27 + 5), Eq("faa"));
-}
-
-}  // namespace
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/testing/snippet-helpers.cc b/icing/testing/snippet-helpers.cc
index 7a71987..cfd20c2 100644
--- a/icing/testing/snippet-helpers.cc
+++ b/icing/testing/snippet-helpers.cc
@@ -77,16 +77,6 @@ std::vector<std::string_view> GetMatches(
   return matches;
 }
 
-std::vector<std::string_view> GetSubMatches(
-    std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
-  std::vector<std::string_view> matches;
-  for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
-    matches.push_back(content.substr(match.exact_match_byte_position(),
-                                     match.submatch_byte_length()));
-  }
-  return matches;
-}
-
 std::string_view GetString(const DocumentProto* document,
                            std::string_view property_path) {
   std::vector<std::string_view> properties =
diff --git a/icing/testing/snippet-helpers.h b/icing/testing/snippet-helpers.h
index 73b2ce2..defadeb 100644
--- a/icing/testing/snippet-helpers.h
+++ b/icing/testing/snippet-helpers.h
@@ -40,10 +40,6 @@ std::vector<std::string_view> GetWindows(
 std::vector<std::string_view> GetMatches(
     std::string_view content, const SnippetProto::EntryProto& snippet_proto);
 
-// Retrieves all submatches defined by the snippet_proto for the content.
-std::vector<std::string_view> GetSubMatches(
-    std::string_view content, const SnippetProto::EntryProto& snippet_proto);
-
 // Retrieves the string value held in the document corresponding to the
 // property_path.
 // Example:
diff --git a/icing/tokenization/combined-tokenizer_test.cc b/icing/tokenization/combined-tokenizer_test.cc
deleted file mode 100644
index 0212e4f..0000000
--- a/icing/tokenization/combined-tokenizer_test.cc
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright (C) 2022 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string_view>
-#include <vector>
-
-#include "testing/base/public/gmock.h"
-#include "testing/base/public/gunit.h"
-#include "third_party/icing/portable/platform.h"
-#include "third_party/icing/proto/schema_proto_portable.pb.h"
-#include "third_party/icing/testing/common-matchers.h"
-#include "third_party/icing/testing/icu-data-file-helper.h"
-#include "third_party/icing/testing/jni-test-helpers.h"
-#include "third_party/icing/testing/test-data.h"
-#include "third_party/icing/tokenization/language-segmenter-factory.h"
-#include "third_party/icing/tokenization/language-segmenter.h"
-#include "third_party/icing/tokenization/tokenizer-factory.h"
-#include "third_party/icing/tokenization/tokenizer.h"
-#include "third_party/icu/include/unicode/uloc.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-using ::testing::ElementsAre;
-
-// This test exists to ensure that the different tokenizers treat different
-// segments of text in the same manner.
-class CombinedTokenizerTest : public ::testing::Test {
- protected:
-  void SetUp() override {
-    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
-      ICING_ASSERT_OK(
-          // File generated via icu_data_file rule in //third_party/icing/BUILD.
-          icu_data_file_helper::SetUpICUDataFile(
-              GetTestFilePath("third_party/icing/icu.dat")));
-    }
-    jni_cache_ = GetTestJniCache();
-
-    language_segmenter_factory::SegmenterOptions options(ULOC_US,
-                                                         jni_cache_.get());
-    ICING_ASSERT_OK_AND_ASSIGN(
-        lang_segmenter_,
-        language_segmenter_factory::Create(std::move(options)));
-  }
-
-  std::unique_ptr<const JniCache> jni_cache_;
-  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
-};
-
-std::vector<std::string> GetTokenTerms(const std::vector<Token>& tokens) {
-  std::vector<std::string> terms;
-  terms.reserve(tokens.size());
-  for (const Token& token : tokens) {
-    if (token.type == Token::Type::REGULAR) {
-      terms.push_back(std::string(token.text));
-    }
-  }
-  return terms;
-}
-
-}  // namespace
-
-TEST_F(CombinedTokenizerTest, SpecialCharacters) {
-  const std::string_view kText = "😊 Hello! Goodbye?";
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> indexing_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> query_tokenizer,
-      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
-                           lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
-                             indexing_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
-  EXPECT_THAT(indexing_terms, ElementsAre("😊", "Hello", "Goodbye"));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
-                             query_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
-  EXPECT_THAT(query_terms, ElementsAre("😊", "Hello", "Goodbye"));
-}
-
-TEST_F(CombinedTokenizerTest, Parentheses) {
-  const std::string_view kText = "((paren1)(paren2) (last paren))";
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> indexing_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> query_tokenizer,
-      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
-                           lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
-                             indexing_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
-  EXPECT_THAT(indexing_terms, ElementsAre("paren1", "paren2", "last", "paren"));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
-                             query_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
-  EXPECT_THAT(query_terms, ElementsAre("paren1", "paren2", "last", "paren"));
-}
-
-TEST_F(CombinedTokenizerTest, Negation) {
-  const std::string_view kText = "-foo -bar -baz";
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> indexing_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> query_tokenizer,
-      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
-                           lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
-                             indexing_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
-  EXPECT_THAT(indexing_terms, ElementsAre("foo", "bar", "baz"));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
-                             query_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
-  EXPECT_THAT(query_terms, ElementsAre("foo", "bar", "baz"));
-}
-
-TEST_F(CombinedTokenizerTest, Colons) {
-  const std::string_view kText = ":foo: :bar baz:";
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> indexing_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> query_tokenizer,
-      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
-                           lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
-                             indexing_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
-  EXPECT_THAT(indexing_terms, ElementsAre("foo", "bar", "baz"));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
-                             query_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
-  EXPECT_THAT(query_terms, ElementsAre("foo", "bar", "baz"));
-}
-
-TEST_F(CombinedTokenizerTest, ColonsPropertyRestricts) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> indexing_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> query_tokenizer,
-      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
-                           lang_segmenter_.get()));
-
-  // This is a difference between the two tokenizers. "foo:bar" is a single
-  // token to the plain tokenizer because ':' is a word connector. But "foo:bar"
-  // is a property restrict to the query tokenizer - so "foo" is the property
-  // and "bar" is the only text term.
-  constexpr std::string_view kText = "foo:bar";
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
-                             indexing_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
-  EXPECT_THAT(indexing_terms, ElementsAre("foo:bar"));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
-                             query_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
-  EXPECT_THAT(query_terms, ElementsAre("bar"));
-
-  // This difference, however, should only apply to the first ':'. A
-  // second ':' should be treated by both tokenizers as a word connector.
-  constexpr std::string_view kText2 = "foo:bar:baz";
-  ICING_ASSERT_OK_AND_ASSIGN(indexing_tokens,
-                             indexing_tokenizer->TokenizeAll(kText2));
-  indexing_terms = GetTokenTerms(indexing_tokens);
-  EXPECT_THAT(indexing_terms, ElementsAre("foo:bar:baz"));
-
-  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
-                             query_tokenizer->TokenizeAll(kText2));
-  query_terms = GetTokenTerms(query_tokens);
-  EXPECT_THAT(query_terms, ElementsAre("bar:baz"));
-}
-
-TEST_F(CombinedTokenizerTest, Punctuation) {
-  const std::string_view kText = "Who? What!? Why & How.";
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> indexing_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> query_tokenizer,
-      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
-                           lang_segmenter_.get()));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
-                             indexing_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
-  EXPECT_THAT(indexing_terms, ElementsAre("Who", "What", "Why", "How"));
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
-                             query_tokenizer->TokenizeAll(kText));
-  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
-  EXPECT_THAT(query_terms, ElementsAre("Who", "What", "Why", "How"));
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index dc7b0a4..cb31441 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -59,7 +59,7 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
 
   ~IcuLanguageSegmenterIterator() {
     ubrk_close(break_iterator_);
-    utext_close(u_text_);
+    utext_close(&u_text_);
   }
 
   // Advances to the next term. Returns false if it has reached the end.
@@ -83,6 +83,9 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       return false;
     }
 
+    if (!IsValidSegment()) {
+      return Advance();
+    }
     return true;
   }
 
@@ -223,7 +226,8 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       return absl_ports::AbortedError(
           "Could not retrieve valid utf8 character!");
     }
-    if (term_end_index_exclusive_ > offset_iterator_.utf8_index()) {
+    if (term_end_index_exclusive_ > offset_iterator_.utf8_index() ||
+        !IsValidSegment()) {
       return ResetToTermEndingBeforeUtf32(term_start_iterator.utf32_index());
     }
     return term_start_iterator.utf32_index();
@@ -249,7 +253,7 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       : break_iterator_(nullptr),
         text_(text),
         locale_(locale),
-        u_text_(nullptr),
+        u_text_(UTEXT_INITIALIZER),
         offset_iterator_(text),
         term_start_index_(0),
         term_end_index_exclusive_(0) {}
@@ -257,13 +261,10 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // Returns true on success
   bool Initialize() {
     UErrorCode status = U_ZERO_ERROR;
-    u_text_ = utext_openUTF8(nullptr, text_.data(), text_.length(), &status);
-    if (u_text_ == nullptr) {
-      return false;
-    }
+    utext_openUTF8(&u_text_, text_.data(), text_.length(), &status);
     break_iterator_ = ubrk_open(UBRK_WORD, locale_.data(), /*text=*/nullptr,
                                 /*textLength=*/0, &status);
-    ubrk_setUText(break_iterator_, u_text_, &status);
+    ubrk_setUText(break_iterator_, &u_text_, &status);
     return !U_FAILURE(status);
   }
 
@@ -290,6 +291,23 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     term_start_index_ = 0;
   }
 
+  bool IsValidSegment() const {
+    // Rule 1: all ASCII terms will be returned.
+    // We know it's a ASCII term by checking the first char.
+    if (i18n_utils::IsAscii(text_[term_start_index_])) {
+      return true;
+    }
+
+    UChar32 uchar32 = i18n_utils::GetUChar32At(text_.data(), text_.length(),
+                                               term_start_index_);
+    // Rule 2: for non-ASCII terms, only the alphabetic terms are returned.
+    // We know it's an alphabetic term by checking the first unicode character.
+    if (u_isUAlphabetic(uchar32)) {
+      return true;
+    }
+    return false;
+  }
+
   // The underlying class that does the segmentation, ubrk_close() must be
   // called after using.
   UBreakIterator* break_iterator_;
@@ -303,8 +321,8 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   std::string_view locale_;
 
   // A thin wrapper around the input UTF8 text, needed by break_iterator_.
-  // Allocated by calling utext_openUtf8() and freed by calling utext_close().
-  UText* u_text_;
+  // utext_close() must be called after using.
+  UText u_text_;
 
   // Offset iterator. This iterator is not guaranteed to point to any particular
   // character, but is guaranteed to point to a valid UTF character sequence.
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index 4098be5..01eb7d8 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -21,8 +21,8 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
@@ -191,7 +191,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Non_ASCII_Non_Alphabetic) {
   // Full-width (non-ASCII) punctuation marks and special characters are left
   // out.
   EXPECT_THAT(language_segmenter->GetAllTerms("。？·Hello！×"),
-              IsOkAndHolds(ElementsAre("。", "？", "·", "Hello", "！", "×")));
+              IsOkAndHolds(ElementsAre("Hello")));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Acronym) {
@@ -252,9 +252,9 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
 
   // Connectors don't connect if one side is an invalid term (？)
   EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz:？"),
-              IsOkAndHolds(ElementsAre("bar:baz", ":", "？")));
+              IsOkAndHolds(ElementsAre("bar:baz", ":")));
   EXPECT_THAT(language_segmenter->GetAllTerms("？:bar:baz"),
-              IsOkAndHolds(ElementsAre("？", ":", "bar:baz")));
+              IsOkAndHolds(ElementsAre(":", "bar:baz")));
   EXPECT_THAT(language_segmenter->GetAllTerms("3:14"),
               IsOkAndHolds(ElementsAre("3", ":", "14")));
   EXPECT_THAT(language_segmenter->GetAllTerms("私:は"),
@@ -372,15 +372,6 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Number) {
               IsOkAndHolds(ElementsAre("-", "123")));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, FullWidthNumbers) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(
-          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
-  EXPECT_THAT(language_segmenter->GetAllTerms("０１２３４５６７８９"),
-              IsOkAndHolds(ElementsAre("０１２３４５６７８９")));
-}
-
 TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
@@ -417,16 +408,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, CJKT) {
   // have whitespaces as word delimiter.
 
   // Chinese
-  EXPECT_THAT(
-      language_segmenter->GetAllTerms("我每天走路去上班。"),
-      IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班", "。")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("我每天走路去上班。"),
+              IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班")));
   // Japanese
   EXPECT_THAT(language_segmenter->GetAllTerms("私は毎日仕事に歩いています。"),
               IsOkAndHolds(ElementsAre("私", "は", "毎日", "仕事", "に", "歩",
-                                       "い", "てい", "ます", "。")));
+                                       "い", "てい", "ます")));
   // Khmer
   EXPECT_THAT(language_segmenter->GetAllTerms("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
-              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ", "។")));
+              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ")));
   // Thai
   EXPECT_THAT(
       language_segmenter->GetAllTerms("ฉันเดินไปทำงานทุกวัน"),
@@ -859,19 +849,16 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
   // String:       "我每天走路去上班。"
-  //                ^ ^  ^   ^^   ^
-  // UTF-8 idx:     0 3  9  15 18 24
-  // UTF-832 idx:   0 1  3   5 6  8
+  //                ^ ^  ^   ^^
+  // UTF-8 idx:     0 3  9  15 18
+  // UTF-832 idx:   0 1  3   5 6
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("每天"));
 
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("走路"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
-  EXPECT_THAT(itr->GetTerm(), Eq("。"));
-
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(8),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
@@ -886,21 +873,18 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
   // String:       "私は毎日仕事に歩いています。"
-  //                ^ ^ ^  ^  ^ ^ ^ ^  ^  ^
-  // UTF-8 idx:     0 3 6  12 18212427 33 39
-  // UTF-32 idx:    0 1 2  4  6 7 8 9  11 13
+  //                ^ ^ ^  ^  ^ ^ ^ ^  ^
+  // UTF-8 idx:     0 3 6  12 18212427 33
+  // UTF-32 idx:    0 1 2  4  6 7 8 9  11
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(13),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(11),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(3), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("仕事"));
-
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(13)));
-  EXPECT_THAT(itr->GetTerm(), Eq("。"));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfterUtf32) {
@@ -912,16 +896,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
   // String:            "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //                     ^ ^   ^   ^  ^
-  // UTF-8 idx:          0 9   24  45 69
-  // UTF-32 idx:         0 3   8   15 23
+  //                     ^ ^   ^   ^
+  // UTF-8 idx:          0 9   24  45
+  // UTF-32 idx:         0 3   8   15
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("ដើរទៅ"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15), IsOkAndHolds(Eq(23)));
-  EXPECT_THAT(itr->GetTerm(), Eq("។"));
-
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(23),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc
index 3aff45c..d293581 100644
--- a/icing/tokenization/language-segmenter-iterator_test.cc
+++ b/icing/tokenization/language-segmenter-iterator_test.cc
@@ -15,9 +15,9 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc
index 6f7d4df..bd86169 100644
--- a/icing/tokenization/language-segmenter_benchmark.cc
+++ b/icing/tokenization/language-segmenter_benchmark.cc
@@ -14,8 +14,8 @@
 
 #include "testing/base/public/benchmark.h"
 #include "gmock/gmock.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
diff --git a/icing/tokenization/plain-tokenizer.cc b/icing/tokenization/plain-tokenizer.cc
index 7a1949f..13fe550 100644
--- a/icing/tokenization/plain-tokenizer.cc
+++ b/icing/tokenization/plain-tokenizer.cc
@@ -66,9 +66,9 @@ class PlainTokenIterator : public Tokenizer::Iterator {
 
   Token GetToken() const override {
     if (current_term_.empty()) {
-      return Token(Token::Type::INVALID);
+      return Token(Token::INVALID);
     }
-    return Token(Token::Type::REGULAR, current_term_);
+    return Token(Token::REGULAR, current_term_);
   }
 
   libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenStart()
@@ -81,8 +81,8 @@ class PlainTokenIterator : public Tokenizer::Iterator {
     return base_iterator_->CalculateTermEndExclusive();
   }
 
-  bool ResetToTokenStartingAfter(int32_t utf32_offset) override {
-    if (!base_iterator_->ResetToTermStartingAfterUtf32(utf32_offset).ok()) {
+  bool ResetToTokenAfter(int32_t offset) override {
+    if (!base_iterator_->ResetToTermStartingAfterUtf32(offset).ok()) {
       return false;
     }
     current_term_ = base_iterator_->GetTerm();
@@ -93,17 +93,15 @@ class PlainTokenIterator : public Tokenizer::Iterator {
     return true;
   }
 
-  bool ResetToTokenEndingBefore(int32_t utf32_offset) override {
+  bool ResetToTokenBefore(int32_t offset) override {
     ICING_ASSIGN_OR_RETURN(
-        utf32_offset,
-        base_iterator_->ResetToTermEndingBeforeUtf32(utf32_offset), false);
+        offset, base_iterator_->ResetToTermEndingBeforeUtf32(offset), false);
     current_term_ = base_iterator_->GetTerm();
     while (!IsValidTerm(current_term_)) {
       // Haven't found a valid term yet. Retrieve the term prior to this one
       // from the segmenter.
       ICING_ASSIGN_OR_RETURN(
-          utf32_offset,
-          base_iterator_->ResetToTermEndingBeforeUtf32(utf32_offset), false);
+          offset, base_iterator_->ResetToTermEndingBeforeUtf32(offset), false);
       current_term_ = base_iterator_->GetTerm();
     }
     return true;
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index c48b51e..7490bfa 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -18,9 +18,9 @@
 
 #include "gmock/gmock.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
@@ -68,27 +68,26 @@ TEST_F(PlainTokenizerTest, Simple) {
 
   EXPECT_THAT(plain_tokenizer->TokenizeAll(""), IsOkAndHolds(IsEmpty()));
 
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("Hello World"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "World"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("Hello World"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
 
   EXPECT_THAT(
       plain_tokenizer->TokenizeAll(
           "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
           "Duis efficitur iaculis auctor."),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Lorem"),
-                               EqualsToken(Token::Type::REGULAR, "ipsum"),
-                               EqualsToken(Token::Type::REGULAR, "dolor"),
-                               EqualsToken(Token::Type::REGULAR, "sit"),
-                               EqualsToken(Token::Type::REGULAR, "amet"),
-                               EqualsToken(Token::Type::REGULAR, "consectetur"),
-                               EqualsToken(Token::Type::REGULAR, "adipiscing"),
-                               EqualsToken(Token::Type::REGULAR, "elit"),
-                               EqualsToken(Token::Type::REGULAR, "Duis"),
-                               EqualsToken(Token::Type::REGULAR, "efficitur"),
-                               EqualsToken(Token::Type::REGULAR, "iaculis"),
-                               EqualsToken(Token::Type::REGULAR, "auctor"))));
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Lorem"),
+                               EqualsToken(Token::REGULAR, "ipsum"),
+                               EqualsToken(Token::REGULAR, "dolor"),
+                               EqualsToken(Token::REGULAR, "sit"),
+                               EqualsToken(Token::REGULAR, "amet"),
+                               EqualsToken(Token::REGULAR, "consectetur"),
+                               EqualsToken(Token::REGULAR, "adipiscing"),
+                               EqualsToken(Token::REGULAR, "elit"),
+                               EqualsToken(Token::REGULAR, "Duis"),
+                               EqualsToken(Token::REGULAR, "efficitur"),
+                               EqualsToken(Token::REGULAR, "iaculis"),
+                               EqualsToken(Token::REGULAR, "auctor"))));
 }
 
 TEST_F(PlainTokenizerTest, Whitespace) {
@@ -108,18 +107,16 @@ TEST_F(PlainTokenizerTest, Whitespace) {
   // 0x0009 is horizontal tab, considered as a whitespace
   std::string text_with_horizontal_tab =
       absl_ports::StrCat("Hello", UCharToString(0x0009), "World");
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll(text_with_horizontal_tab),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "World"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(text_with_horizontal_tab),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
 
   // 0x000B is vertical tab, considered as a whitespace
   std::string text_with_vertical_tab =
       absl_ports::StrCat("Hello", UCharToString(0x000B), "World");
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll(text_with_vertical_tab),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "World"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(text_with_vertical_tab),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
 }
 
 TEST_F(PlainTokenizerTest, Punctuation) {
@@ -134,39 +131,38 @@ TEST_F(PlainTokenizerTest, Punctuation) {
                                  language_segmenter.get()));
 
   // Half-width punctuation marks are filtered out.
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll(
-          "Hello, World! Hello: World. \"Hello\" World?"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "World"),
-                               EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "World"),
-                               EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "World"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(
+                  "Hello, World! Hello: World. \"Hello\" World?"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"),
+                                       EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"),
+                                       EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
 
   // Full-width punctuation marks are filtered out.
   std::vector<std::string_view> exp_tokens;
   if (IsCfStringTokenization()) {
     EXPECT_THAT(
         plain_tokenizer->TokenizeAll("你好，世界！你好：世界。“你好”世界？"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "你"),
-                                 EqualsToken(Token::Type::REGULAR, "好"),
-                                 EqualsToken(Token::Type::REGULAR, "世界"),
-                                 EqualsToken(Token::Type::REGULAR, "你"),
-                                 EqualsToken(Token::Type::REGULAR, "好"),
-                                 EqualsToken(Token::Type::REGULAR, "世界"),
-                                 EqualsToken(Token::Type::REGULAR, "你"),
-                                 EqualsToken(Token::Type::REGULAR, "好"),
-                                 EqualsToken(Token::Type::REGULAR, "世界"))));
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你"),
+                                 EqualsToken(Token::REGULAR, "好"),
+                                 EqualsToken(Token::REGULAR, "世界"),
+                                 EqualsToken(Token::REGULAR, "你"),
+                                 EqualsToken(Token::REGULAR, "好"),
+                                 EqualsToken(Token::REGULAR, "世界"),
+                                 EqualsToken(Token::REGULAR, "你"),
+                                 EqualsToken(Token::REGULAR, "好"),
+                                 EqualsToken(Token::REGULAR, "世界"))));
   } else {
     EXPECT_THAT(
         plain_tokenizer->TokenizeAll("你好，世界！你好：世界。“你好”世界？"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "你好"),
-                                 EqualsToken(Token::Type::REGULAR, "世界"),
-                                 EqualsToken(Token::Type::REGULAR, "你好"),
-                                 EqualsToken(Token::Type::REGULAR, "世界"),
-                                 EqualsToken(Token::Type::REGULAR, "你好"),
-                                 EqualsToken(Token::Type::REGULAR, "世界"))));
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你好"),
+                                 EqualsToken(Token::REGULAR, "世界"),
+                                 EqualsToken(Token::REGULAR, "你好"),
+                                 EqualsToken(Token::REGULAR, "世界"),
+                                 EqualsToken(Token::REGULAR, "你好"),
+                                 EqualsToken(Token::REGULAR, "世界"))));
   }
 }
 
@@ -184,16 +180,14 @@ TEST_F(PlainTokenizerTest, SpecialCharacters) {
   // Right now we don't have special logic for these characters, just output
   // them as tokens.
 
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("1+1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "1"),
-                               EqualsToken(Token::Type::REGULAR, "+"),
-                               EqualsToken(Token::Type::REGULAR, "1"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("1+1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "1"),
+                                       EqualsToken(Token::REGULAR, "+"),
+                                       EqualsToken(Token::REGULAR, "1"))));
 
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("$50"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "$"),
-                               EqualsToken(Token::Type::REGULAR, "50"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("$50"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "$"),
+                                       EqualsToken(Token::REGULAR, "50"))));
 }
 
 TEST_F(PlainTokenizerTest, CJKT) {
@@ -209,13 +203,12 @@ TEST_F(PlainTokenizerTest, CJKT) {
                              tokenizer_factory::CreateIndexingTokenizer(
                                  StringIndexingConfig::TokenizerType::PLAIN,
                                  language_segmenter.get()));
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("我每天走路去上班。"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "我"),
-                               EqualsToken(Token::Type::REGULAR, "每天"),
-                               EqualsToken(Token::Type::REGULAR, "走路"),
-                               EqualsToken(Token::Type::REGULAR, "去"),
-                               EqualsToken(Token::Type::REGULAR, "上班"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("我每天走路去上班。"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "我"),
+                                       EqualsToken(Token::REGULAR, "每天"),
+                                       EqualsToken(Token::REGULAR, "走路"),
+                                       EqualsToken(Token::REGULAR, "去"),
+                                       EqualsToken(Token::REGULAR, "上班"))));
   // Japanese
   options = language_segmenter_factory::SegmenterOptions(ULOC_JAPANESE,
                                                          jni_cache_.get());
@@ -227,44 +220,41 @@ TEST_F(PlainTokenizerTest, CJKT) {
                                  StringIndexingConfig::TokenizerType::PLAIN,
                                  language_segmenter.get()));
   if (IsCfStringTokenization()) {
-    EXPECT_THAT(
-        plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "私"),
-                                 EqualsToken(Token::Type::REGULAR, "は"),
-                                 EqualsToken(Token::Type::REGULAR, "毎日"),
-                                 EqualsToken(Token::Type::REGULAR, "仕事"),
-                                 EqualsToken(Token::Type::REGULAR, "に"),
-                                 EqualsToken(Token::Type::REGULAR, "歩い"),
-                                 EqualsToken(Token::Type::REGULAR, "て"),
-                                 EqualsToken(Token::Type::REGULAR, "い"),
-                                 EqualsToken(Token::Type::REGULAR, "ます"))));
+    EXPECT_THAT(plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+                IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "私"),
+                                         EqualsToken(Token::REGULAR, "は"),
+                                         EqualsToken(Token::REGULAR, "毎日"),
+                                         EqualsToken(Token::REGULAR, "仕事"),
+                                         EqualsToken(Token::REGULAR, "に"),
+                                         EqualsToken(Token::REGULAR, "歩い"),
+                                         EqualsToken(Token::REGULAR, "て"),
+                                         EqualsToken(Token::REGULAR, "い"),
+                                         EqualsToken(Token::REGULAR, "ます"))));
   } else {
-    EXPECT_THAT(
-        plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "私"),
-                                 EqualsToken(Token::Type::REGULAR, "は"),
-                                 EqualsToken(Token::Type::REGULAR, "毎日"),
-                                 EqualsToken(Token::Type::REGULAR, "仕事"),
-                                 EqualsToken(Token::Type::REGULAR, "に"),
-                                 EqualsToken(Token::Type::REGULAR, "歩"),
-                                 EqualsToken(Token::Type::REGULAR, "い"),
-                                 EqualsToken(Token::Type::REGULAR, "てい"),
-                                 EqualsToken(Token::Type::REGULAR, "ます"))));
+    EXPECT_THAT(plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+                IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "私"),
+                                         EqualsToken(Token::REGULAR, "は"),
+                                         EqualsToken(Token::REGULAR, "毎日"),
+                                         EqualsToken(Token::REGULAR, "仕事"),
+                                         EqualsToken(Token::REGULAR, "に"),
+                                         EqualsToken(Token::REGULAR, "歩"),
+                                         EqualsToken(Token::REGULAR, "い"),
+                                         EqualsToken(Token::REGULAR, "てい"),
+                                         EqualsToken(Token::REGULAR, "ます"))));
   }
 
   // Khmer
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "ញុំ"),
-                               EqualsToken(Token::Type::REGULAR, "ដើរទៅ"),
-                               EqualsToken(Token::Type::REGULAR, "ធ្វើការ"),
-                               EqualsToken(Token::Type::REGULAR, "រាល់ថ្ងៃ"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ញុំ"),
+                                       EqualsToken(Token::REGULAR, "ដើរទៅ"),
+                                       EqualsToken(Token::REGULAR, "ធ្វើការ"),
+                                       EqualsToken(Token::REGULAR, "រាល់ថ្ងៃ"))));
   // Korean
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("나는 매일 출근합니다."),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::REGULAR, "나는"),
-                  EqualsToken(Token::Type::REGULAR, "매일"),
-                  EqualsToken(Token::Type::REGULAR, "출근합니다"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("나는 매일 출근합니다."),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "나는"),
+                               EqualsToken(Token::REGULAR, "매일"),
+                               EqualsToken(Token::REGULAR, "출근합니다"))));
 
   // Thai
   // DIFFERENCE!! Disagreement over how to segment "ทุกวัน" (iOS groups).
@@ -274,24 +264,23 @@ TEST_F(PlainTokenizerTest, CJKT) {
         std::vector<Token> tokens,
         plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"));
 
-    EXPECT_THAT(tokens, ElementsAre(EqualsToken(Token::Type::REGULAR, "ฉัน"),
-                                    EqualsToken(Token::Type::REGULAR, "เดิน"),
-                                    EqualsToken(Token::Type::REGULAR, "ไป"),
-                                    EqualsToken(Token::Type::REGULAR, "ทำงาน"),
-                                    EqualsToken(Token::Type::REGULAR, "ทุกวัน")));
+    EXPECT_THAT(tokens, ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
+                                    EqualsToken(Token::REGULAR, "เดิน"),
+                                    EqualsToken(Token::REGULAR, "ไป"),
+                                    EqualsToken(Token::REGULAR, "ทำงาน"),
+                                    EqualsToken(Token::REGULAR, "ทุกวัน")));
   } else {
-    EXPECT_THAT(
-        plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "ฉัน"),
-                                 EqualsToken(Token::Type::REGULAR, "เดิน"),
-                                 EqualsToken(Token::Type::REGULAR, "ไป"),
-                                 EqualsToken(Token::Type::REGULAR, "ทำงาน"),
-                                 EqualsToken(Token::Type::REGULAR, "ทุก"),
-                                 EqualsToken(Token::Type::REGULAR, "วัน"))));
+    EXPECT_THAT(plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
+                IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
+                                         EqualsToken(Token::REGULAR, "เดิน"),
+                                         EqualsToken(Token::REGULAR, "ไป"),
+                                         EqualsToken(Token::REGULAR, "ทำงาน"),
+                                         EqualsToken(Token::REGULAR, "ทุก"),
+                                         EqualsToken(Token::REGULAR, "วัน"))));
   }
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenStartingAfterSimple) {
+TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
   language_segmenter_factory::SegmenterOptions options(ULOC_US,
                                                        jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -305,13 +294,13 @@ TEST_F(PlainTokenizerTest, ResetToTokenStartingAfterSimple) {
   constexpr std::string_view kText = "f b";
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
 
-  EXPECT_TRUE(iterator->ResetToTokenStartingAfter(0));
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::Type::REGULAR, "b"));
+  EXPECT_TRUE(iterator->ResetToTokenAfter(0));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "b"));
 
-  EXPECT_FALSE(iterator->ResetToTokenStartingAfter(2));
+  EXPECT_FALSE(iterator->ResetToTokenAfter(2));
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenEndingBeforeSimple) {
+TEST_F(PlainTokenizerTest, ResetToTokenBeforeSimple) {
   language_segmenter_factory::SegmenterOptions options(ULOC_US,
                                                        jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -325,13 +314,13 @@ TEST_F(PlainTokenizerTest, ResetToTokenEndingBeforeSimple) {
   constexpr std::string_view kText = "f b";
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
 
-  EXPECT_TRUE(iterator->ResetToTokenEndingBefore(2));
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::Type::REGULAR, "f"));
+  EXPECT_TRUE(iterator->ResetToTokenBefore(2));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "f"));
 
-  EXPECT_FALSE(iterator->ResetToTokenEndingBefore(0));
+  EXPECT_FALSE(iterator->ResetToTokenBefore(0));
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenStartingAfter) {
+TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
   language_segmenter_factory::SegmenterOptions options(ULOC_US,
                                                        jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -343,12 +332,11 @@ TEST_F(PlainTokenizerTest, ResetToTokenStartingAfter) {
                                  language_segmenter.get()));
 
   constexpr std::string_view kText = " foo . bar baz.. bat ";
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll(kText),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "foo"),
-                               EqualsToken(Token::Type::REGULAR, "bar"),
-                               EqualsToken(Token::Type::REGULAR, "baz"),
-                               EqualsToken(Token::Type::REGULAR, "bat"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "foo"),
+                                       EqualsToken(Token::REGULAR, "bar"),
+                                       EqualsToken(Token::REGULAR, "baz"),
+                                       EqualsToken(Token::REGULAR, "bat"))));
   std::vector<std::string> expected_text = {
       "foo",  //  0: " foo . bar"
       "bar",  //  1: "foo . bar "
@@ -371,19 +359,19 @@ TEST_F(PlainTokenizerTest, ResetToTokenStartingAfter) {
 
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
   EXPECT_TRUE(iterator->Advance());
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::Type::REGULAR, "foo"));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "foo"));
   for (int i = 0; i < kText.length(); ++i) {
     if (i < expected_text.size()) {
-      EXPECT_TRUE(iterator->ResetToTokenStartingAfter(i));
+      EXPECT_TRUE(iterator->ResetToTokenAfter(i));
       EXPECT_THAT(iterator->GetToken(),
-                  EqualsToken(Token::Type::REGULAR, expected_text[i]));
+                  EqualsToken(Token::REGULAR, expected_text[i]));
     } else {
-      EXPECT_FALSE(iterator->ResetToTokenStartingAfter(i));
+      EXPECT_FALSE(iterator->ResetToTokenAfter(i));
     }
   }
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenEndingBefore) {
+TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
   language_segmenter_factory::SegmenterOptions options(ULOC_US,
                                                        jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -395,12 +383,11 @@ TEST_F(PlainTokenizerTest, ResetToTokenEndingBefore) {
                                  language_segmenter.get()));
 
   constexpr std::string_view kText = " foo . bar baz.. bat ";
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll(kText),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "foo"),
-                               EqualsToken(Token::Type::REGULAR, "bar"),
-                               EqualsToken(Token::Type::REGULAR, "baz"),
-                               EqualsToken(Token::Type::REGULAR, "bat"))));
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "foo"),
+                                       EqualsToken(Token::REGULAR, "bar"),
+                                       EqualsToken(Token::REGULAR, "baz"),
+                                       EqualsToken(Token::REGULAR, "bat"))));
   std::vector<std::string> expected_text = {
       "bat",  // 20: "baz.. bat "
       "baz",  // 19: " baz.. bat"
@@ -423,16 +410,15 @@ TEST_F(PlainTokenizerTest, ResetToTokenEndingBefore) {
 
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
   EXPECT_TRUE(iterator->Advance());
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::Type::REGULAR, "foo"));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "foo"));
   for (int i = kText.length() - 1; i >= 0; --i) {
     int expected_index = kText.length() - 1 - i;
     if (expected_index < expected_text.size()) {
-      EXPECT_TRUE(iterator->ResetToTokenEndingBefore(i));
-      EXPECT_THAT(
-          iterator->GetToken(),
-          EqualsToken(Token::Type::REGULAR, expected_text[expected_index]));
+      EXPECT_TRUE(iterator->ResetToTokenBefore(i));
+      EXPECT_THAT(iterator->GetToken(),
+                  EqualsToken(Token::REGULAR, expected_text[expected_index]));
     } else {
-      EXPECT_FALSE(iterator->ResetToTokenEndingBefore(i));
+      EXPECT_FALSE(iterator->ResetToTokenBefore(i));
     }
   }
 }
diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
index ff449a7..205d3a2 100644
--- a/icing/tokenization/raw-query-tokenizer.cc
+++ b/icing/tokenization/raw-query-tokenizer.cc
@@ -14,8 +14,9 @@
 
 #include "icing/tokenization/raw-query-tokenizer.h"
 
+#include <stddef.h>
+
 #include <cctype>
-#include <cstddef>
 #include <memory>
 #include <string>
 #include <string_view>
@@ -102,7 +103,7 @@ enum State {
   // When seeing right parentheses
   CLOSING_PARENTHESES = 8,
 
-  PROCESSING_NON_ASCII_ALPHANUMERIC_TERM = 9,
+  PROCESSING_NON_ASCII_ALPHABETIC_TERM = 9,
 
   PROCESSING_PROPERTY_TERM_APPENDING = 10,
 
@@ -119,7 +120,7 @@ enum TermType {
   // A term that consists of unicode alphabetic and numeric characters
   ASCII_ALPHANUMERIC_TERM = 1,
 
-  NON_ASCII_ALPHANUMERIC_TERM = 2,
+  NON_ASCII_ALPHABETIC_TERM = 2,
 
   // "("
   LEFT_PARENTHESES = 3,
@@ -208,7 +209,7 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
 // PROCESSING_OR = 6
 // OPENING_PARENTHESES = 7
 // CLOSING_PARENTHESES = 8
-// PROCESSING_NON_ASCII_ALPHANUMERIC_TERM = 9
+// PROCESSING_NON_ASCII_ALPHABETIC_TERM = 9
 // PROCESSING_PROPERTY_TERM_APPENDING = 10
 //
 // Actions:
@@ -252,40 +253,40 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
 // like "+", "&", "@", "#" in indexing and query tokenizers.
 constexpr State state_transition_rules[STATE_COUNT][TYPE_COUNT] = {
     /*State: Ready*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
      OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
      PROCESSING_OR, READY, READY},
     /*State: PROCESSING_ALPHANUMERIC_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
      OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID,
      PROCESSING_PROPERTY_RESTRICT, READY},
     /*State: PROCESSING_EXCLUSION*/
     {READY, PROCESSING_EXCLUSION_TERM, PROCESSING_EXCLUSION_TERM, INVALID,
      CLOSING_PARENTHESES, PROCESSING_EXCLUSION, INVALID, INVALID, READY},
     /*State: PROCESSING_EXCLUSION_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
      OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
     /*State: PROCESSING_PROPERTY_RESTRICT*/
     {READY, PROCESSING_PROPERTY_TERM, PROCESSING_PROPERTY_TERM, INVALID,
      CLOSING_PARENTHESES, INVALID, INVALID, PROCESSING_PROPERTY_RESTRICT,
      READY},
     /*State: PROCESSING_PROPERTY_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
      OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID,
      PROCESSING_PROPERTY_TERM_APPENDING, READY},
     /*State: PROCESSING_OR*/
     {READY, INVALID, INVALID, OPENING_PARENTHESES, CLOSING_PARENTHESES, INVALID,
      INVALID, INVALID, READY},
     /*State: OPENING_PARENTHESES*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
      OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
      OPENING_PARENTHESES, READY, READY},
     /*State: CLOSING_PARENTHESES*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
      OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
      PROCESSING_OR, INVALID, READY},
-    /*State: PROCESSING_NON_ASCII_ALPHANUMERIC_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHANUMERIC_TERM,
+    /*State: PROCESSING_NON_ASCII_ALPHABETIC_TERM*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
      OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
     /*State: PROCESSING_PROPERTY_TERM_APPENDING*/
     {READY, PROCESSING_PROPERTY_TERM_APPENDING,
@@ -326,7 +327,7 @@ constexpr ActionOrError action_rules[STATE_COUNT][TYPE_COUNT] = {
     /*State: CLOSING_PARENTHESES*/
     {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
      ERROR_GROUP_AS_PROPERTY_NAME, OUTPUT},
-    /*State: PROCESSING_NON_ASCII_ALPHANUMERIC_TERM*/
+    /*State: PROCESSING_NON_ASCII_ALPHABETIC_TERM*/
     {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
      ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NON_ASCII_AS_PROPERTY_NAME, OUTPUT},
     /*State: PROCESSING_PROPERTY_TERM_APPENDING*/
@@ -345,40 +346,6 @@ std::pair<TermType, std::string_view> GetWhitespaceTerm(std::string_view text,
   return std::make_pair(WHITESPACE, text.substr(pos, cur - pos));
 }
 
-TermType GetContentTermType(std::string_view text, size_t pos) {
-  if (i18n_utils::IsPunctuationAt(text, pos)) {
-    return OTHER;
-  } else if (i18n_utils::IsAscii(text[pos])) {
-    return ASCII_ALPHANUMERIC_TERM;
-  }
-  return NON_ASCII_ALPHANUMERIC_TERM;
-}
-
-bool IsContentTermType(TermType term_type) {
-  switch (term_type) {
-    case ASCII_ALPHANUMERIC_TERM:
-      [[fallthrough]];
-    case NON_ASCII_ALPHANUMERIC_TERM:
-      [[fallthrough]];
-    case OTHER:
-      return true;
-    case WHITESPACE:
-      [[fallthrough]];
-    case LEFT_PARENTHESES:
-      [[fallthrough]];
-    case RIGHT_PARENTHESES:
-      [[fallthrough]];
-    case EXCLUSION_OPERATOR:
-      [[fallthrough]];
-    case OR_OPERATOR:
-      [[fallthrough]];
-    case COLON:
-      [[fallthrough]];
-    case TYPE_COUNT:
-      return false;
-  }
-}
-
 // Determines the length of the potential content term beginning at text[pos]
 // and returns a pair with the appropriate TermType and a string_view of the
 // content term.
@@ -391,7 +358,12 @@ std::pair<TermType, std::string_view> GetContentTerm(std::string_view text,
                                                      size_t pos) {
   size_t len = 0;
   // Checks the first char to see if it's an ASCII term
-  TermType type = GetContentTermType(text, pos);
+  TermType type = ASCII_ALPHANUMERIC_TERM;
+  if (!i18n_utils::IsAscii(text[pos])) {
+    type = NON_ASCII_ALPHABETIC_TERM;
+  } else if (std::isalnum(text[pos])) {
+    type = OTHER;
+  }
   for (size_t cur = pos; cur < text.length() && len == 0; ++cur) {
     switch (text[cur]) {
       case kLeftParentheses:
@@ -451,7 +423,7 @@ std::pair<TermType, std::string_view> GetTerm(std::string_view text,
 // and [(cat OR)]. This helps assert extra rule 3: "OR" is ignored if there's no
 // valid token on its right.
 void RemoveLastTokenIfOrOperator(std::vector<Token>* tokens) {
-  if (!tokens->empty() && tokens->back().type == Token::Type::QUERY_OR) {
+  if (!tokens->empty() && tokens->back().type == Token::QUERY_OR) {
     tokens->pop_back();
   }
 }
@@ -465,11 +437,11 @@ libtextclassifier3::Status OutputOrOperatorToken(std::vector<Token>* tokens) {
   }
   Token::Type last_token_type = tokens->back().type;
   switch (last_token_type) {
-    case Token::Type::REGULAR:
-    case Token::Type::QUERY_RIGHT_PARENTHESES:
-      tokens->emplace_back(Token::Type::QUERY_OR);
+    case Token::REGULAR:
+    case Token::QUERY_RIGHT_PARENTHESES:
+      tokens->emplace_back(Token::QUERY_OR);
       break;
-    case Token::Type::QUERY_OR:
+    case Token::QUERY_OR:
       // Ignores "OR" because there's already an "OR", e.g. "term1 OR OR term2"
       break;
     default:
@@ -499,7 +471,7 @@ libtextclassifier3::Status OutputToken(State new_state,
   switch (current_term_type) {
     case ASCII_ALPHANUMERIC_TERM:
       [[fallthrough]];
-    case NON_ASCII_ALPHANUMERIC_TERM:
+    case NON_ASCII_ALPHABETIC_TERM:
       if (new_state == PROCESSING_PROPERTY_TERM) {
         // Asserts extra rule 1: each property name in the property path is a
         // valid term.
@@ -510,21 +482,21 @@ libtextclassifier3::Status OutputToken(State new_state,
                 GetErrorMessage(ERROR_NON_ASCII_AS_PROPERTY_NAME));
           }
         }
-        tokens->emplace_back(Token::Type::QUERY_PROPERTY, current_term);
+        tokens->emplace_back(Token::QUERY_PROPERTY, current_term);
       } else {
-        tokens->emplace_back(Token::Type::REGULAR, current_term);
+        tokens->emplace_back(Token::REGULAR, current_term);
       }
       break;
     case LEFT_PARENTHESES:
-      tokens->emplace_back(Token::Type::QUERY_LEFT_PARENTHESES);
+      tokens->emplace_back(Token::QUERY_LEFT_PARENTHESES);
       break;
     case RIGHT_PARENTHESES:
       // Ignores "OR" if it's followed by right parentheses.
       RemoveLastTokenIfOrOperator(tokens);
-      tokens->emplace_back(Token::Type::QUERY_RIGHT_PARENTHESES);
+      tokens->emplace_back(Token::QUERY_RIGHT_PARENTHESES);
       break;
     case EXCLUSION_OPERATOR:
-      tokens->emplace_back(Token::Type::QUERY_EXCLUSION);
+      tokens->emplace_back(Token::QUERY_EXCLUSION);
       break;
     case OR_OPERATOR:
       return OutputOrOperatorToken(tokens);
@@ -569,8 +541,10 @@ libtextclassifier3::Status ProcessTerm(
         ICING_ASSIGN_OR_RETURN(std::vector<std::string_view> content_terms,
                                language_segmenter->GetAllTerms(*current_term));
         for (std::string_view term : content_terms) {
-          TermType type = GetContentTermType(term, 0);
-          if (type == OTHER) {
+          TermType type = ASCII_ALPHANUMERIC_TERM;
+          if (!i18n_utils::IsAscii(term[0])) {
+            type = NON_ASCII_ALPHABETIC_TERM;
+          } else if (!std::isalnum(term[0])) {
             // Skip OTHER tokens here.
             continue;
           }
@@ -616,7 +590,9 @@ libtextclassifier3::StatusOr<std::vector<Token>> ProcessTerms(
   for (int i = 0; i < prescanned_terms.size(); ++i) {
     const std::pair<TermType, std::string_view>& prescanned_term =
         prescanned_terms.at(i);
-    if (!IsContentTermType(prescanned_term.first)) {
+    if (prescanned_term.first != ASCII_ALPHANUMERIC_TERM &&
+        prescanned_term.first != NON_ASCII_ALPHABETIC_TERM &&
+        prescanned_term.first != OTHER) {
       // This can't be a property restrict. Just pass it in.
       ICING_RETURN_IF_ERROR(
           ProcessTerm(&current_state, &current_term, &current_term_type,
@@ -628,15 +604,18 @@ libtextclassifier3::StatusOr<std::vector<Token>> ProcessTerms(
           std::vector<std::string_view> content_terms,
           language_segmenter->GetAllTerms(prescanned_term.second));
       for (std::string_view term : content_terms) {
-        TermType type = GetContentTermType(term, 0);
+        TermType type = ASCII_ALPHANUMERIC_TERM;
         if (term == kOrOperator) {
           // TODO(tjbarron) Decide whether we should revise this and other
           // handled syntax. This is used to allow queries like "term1,OR,term2"
           // to succeed. It's not clear if we should allow this or require
           // clients to ensure that OR operators are always surrounded by
           // whitespace.
-          // Override the type if this is actually an OR operator.
           type = OR_OPERATOR;
+        } else if (!i18n_utils::IsAscii(term[0])) {
+          type = NON_ASCII_ALPHABETIC_TERM;
+        } else if (!std::isalnum(term[0])) {
+          type = OTHER;
         }
         ICING_RETURN_IF_ERROR(ProcessTerm(&current_state, &current_term,
                                           &current_term_type,
@@ -670,7 +649,7 @@ class RawQueryTokenIterator : public Tokenizer::Iterator {
 
   Token GetToken() const override {
     if (current_ < 0 || current_ >= tokens_.size()) {
-      return Token(Token::Type::INVALID);
+      return Token(Token::INVALID);
     }
     return tokens_.at(current_);
   }
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index b1dcc73..500efa0 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -16,9 +16,9 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/tokenizer-factory.h"
@@ -59,38 +59,13 @@ TEST_F(RawQueryTokenizerTest, Simple) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("Hello World!"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "World"))));
-
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("hElLo WORLD"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "hElLo"),
-                               EqualsToken(Token::Type::REGULAR, "WORLD"))));
-}
-
-TEST_F(RawQueryTokenizerTest, Emoji) {
-  language_segmenter_factory::SegmenterOptions options(ULOC_US);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> raw_query_tokenizer,
-      tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
-                                              language_segmenter.get()));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("Hello World!"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("😊 Hello! Goodbye?"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "😊"),
-                               EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "Goodbye"))));
-
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("Hello😊 ! Goodbye?"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
-                               EqualsToken(Token::Type::REGULAR, "😊"),
-                               EqualsToken(Token::Type::REGULAR, "Goodbye"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("hElLo WORLD"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "hElLo"),
+                                       EqualsToken(Token::REGULAR, "WORLD"))));
 }
 
 TEST_F(RawQueryTokenizerTest, Parentheses) {
@@ -103,96 +78,84 @@ TEST_F(RawQueryTokenizerTest, Parentheses) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
-                             raw_query_tokenizer->TokenizeAll("()"));
-  EXPECT_THAT(
-      query_tokens,
-      ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("()"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
-  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
-                             raw_query_tokenizer->TokenizeAll("( )"));
-  EXPECT_THAT(
-      query_tokens,
-      ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( )"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
-  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
-                             raw_query_tokenizer->TokenizeAll("(term1 term2)"));
-  EXPECT_THAT(
-      query_tokens,
-      ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::REGULAR, "term2"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1 term2) (term3 term4))"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term3"),
+                  EqualsToken(Token::REGULAR, "term4"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1(term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      query_tokens,
-      raw_query_tokenizer->TokenizeAll("((term1 term2) (term3 term4))"));
-  EXPECT_THAT(
-      query_tokens,
-      ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::REGULAR, "term2"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term3"),
-                  EqualsToken(Token::Type::REGULAR, "term4"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
-
-  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
-                             raw_query_tokenizer->TokenizeAll("term1(term2)"));
   EXPECT_THAT(
-      query_tokens,
-      ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
-
-  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
-                             raw_query_tokenizer->TokenizeAll("(term1)term2"));
-  EXPECT_THAT(query_tokens,
-              ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                          EqualsToken(Token::Type::REGULAR, "term1"),
-                          EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                          EqualsToken(Token::Type::REGULAR, "term2")));
+      raw_query_tokenizer->TokenizeAll("(term1)term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
-
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)-term2"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)OR term2"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_OR, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("(term1)-term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                               EqualsToken(Token::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::REGULAR, "term2"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("(term1)OR term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                               EqualsToken(Token::QUERY_OR, ""),
+                               EqualsToken(Token::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)OR(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_OR, ""),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_OR, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1):term2"),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
@@ -217,49 +180,44 @@ TEST_F(RawQueryTokenizerTest, Exclustion) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("-term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                               EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(-term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_EXCLUSION, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Exclusion operator is ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("- term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("- term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
 
   // Exclusion operator is ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("term1- term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
-                               EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1- term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::REGULAR, "term2"))));
 
   // Exclusion operator is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 -)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // First exclusion operator is ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("--term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                               EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("--term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "term1"))));
 
   // First "-" is exclusion operator, second is not and will be discarded.
   // In other words, exclusion only applies to the term right after it.
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("-term1-term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                               EqualsToken(Token::Type::REGULAR, "term1"),
-                               EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1-term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-(term1)"),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
@@ -291,75 +249,73 @@ TEST_F(RawQueryTokenizerTest, PropertyRestriction) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(property1:term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Colon is ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll(":term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll(":term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
 
   // Colon is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(:term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Colon is ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("term1:"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1:"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
 
   // property name can be a path
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("email.title:hello"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "email.title"),
-                  EqualsToken(Token::Type::REGULAR, "hello"))));
+              IsOkAndHolds(
+                  ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "email.title"),
+                              EqualsToken(Token::REGULAR, "hello"))));
 
   // The first colon ":" triggers property restriction, the second colon is used
   // as a word connector per ICU's rule
   // (https://unicode.org/reports/tr29/#Word_Boundaries).
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property:foo:bar"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "property"),
-                  EqualsToken(Token::Type::REGULAR, "foo:bar"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property:foo:bar"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property"),
+                               EqualsToken(Token::REGULAR, "foo:bar"))));
 
   // Property restriction only applies to the term right after it.
   // Note: "term1:term2" is not a term but 2 terms because word connectors
   // don't apply to numbers and alphabets.
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1:term2"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:term1:term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::REGULAR, "term2"))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:今天:天气"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::Type::REGULAR, "今天"),
-                  EqualsToken(Token::Type::REGULAR, "天气"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:今天:天气"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "今天"),
+                               EqualsToken(Token::REGULAR, "天气"))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1-"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:term1-"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"))));
 
   // Multiple continuous colons will still be recognized as a property
   // restriction operator
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1::term1"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1::term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("property1:(term1)"),
@@ -389,109 +345,105 @@ TEST_F(RawQueryTokenizerTest, OR) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("term1 OR term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
-                               EqualsToken(Token::Type::QUERY_OR, ""),
-                               EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::QUERY_OR, ""),
+                                       EqualsToken(Token::REGULAR, "term2"))));
 
   // Two continuous "OR"s are treated as one
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("term1 OR OR term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
-                               EqualsToken(Token::Type::QUERY_OR, ""),
-                               EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR OR term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::QUERY_OR, ""),
+                                       EqualsToken(Token::REGULAR, "term2"))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1) OR term2"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_OR, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("(term1) OR term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                               EqualsToken(Token::QUERY_OR, ""),
+                               EqualsToken(Token::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR (term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_OR, ""),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_OR, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1) OR (term2))"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_OR, ""),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_OR, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Only "OR" (all in uppercase) is the operator
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("term1 or term2 Or term3 oR term4"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
-                               EqualsToken(Token::Type::REGULAR, "or"),
-                               EqualsToken(Token::Type::REGULAR, "term2"),
-                               EqualsToken(Token::Type::REGULAR, "Or"),
-                               EqualsToken(Token::Type::REGULAR, "term3"),
-                               EqualsToken(Token::Type::REGULAR, "oR"),
-                               EqualsToken(Token::Type::REGULAR, "term4"))));
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::REGULAR, "or"),
+                               EqualsToken(Token::REGULAR, "term2"),
+                               EqualsToken(Token::REGULAR, "Or"),
+                               EqualsToken(Token::REGULAR, "term3"),
+                               EqualsToken(Token::REGULAR, "oR"),
+                               EqualsToken(Token::REGULAR, "term4"))));
 
   // "OR" is ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("OR term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("OR term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
 
   // "OR" is ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("term1 OR"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(OR term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( OR term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 OR)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 OR )"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( OR )"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_OR, ""),
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term2"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_OR, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("term1 OR-term2"),
@@ -520,31 +472,31 @@ TEST_F(RawQueryTokenizerTest, CJKT) {
   if (IsCfStringTokenization()) {
     EXPECT_THAT(
         raw_query_tokenizer->TokenizeAll("-今天天气很好"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                                 EqualsToken(Token::Type::REGULAR, "今天"),
-                                 EqualsToken(Token::Type::REGULAR, "天气"),
-                                 EqualsToken(Token::Type::REGULAR, "很"),
-                                 EqualsToken(Token::Type::REGULAR, "好"))));
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                 EqualsToken(Token::REGULAR, "今天"),
+                                 EqualsToken(Token::REGULAR, "天气"),
+                                 EqualsToken(Token::REGULAR, "很"),
+                                 EqualsToken(Token::REGULAR, "好"))));
   } else {
     EXPECT_THAT(
         raw_query_tokenizer->TokenizeAll("-今天天气很好"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                                 EqualsToken(Token::Type::REGULAR, "今天"),
-                                 EqualsToken(Token::Type::REGULAR, "天气"),
-                                 EqualsToken(Token::Type::REGULAR, "很好"))));
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                 EqualsToken(Token::REGULAR, "今天"),
+                                 EqualsToken(Token::REGULAR, "天气"),
+                                 EqualsToken(Token::REGULAR, "很好"))));
   }
 
   if (IsCfStringTokenization()) {
     EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:你好"),
-                IsOkAndHolds(ElementsAre(
-                    EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                    EqualsToken(Token::Type::REGULAR, "你"),
-                    EqualsToken(Token::Type::REGULAR, "好"))));
+                IsOkAndHolds(
+                    ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                                EqualsToken(Token::REGULAR, "你"),
+                                EqualsToken(Token::REGULAR, "好"))));
   } else {
     EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:你好"),
-                IsOkAndHolds(ElementsAre(
-                    EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                    EqualsToken(Token::Type::REGULAR, "你好"))));
+                IsOkAndHolds(
+                    ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                                EqualsToken(Token::REGULAR, "你好"))));
   }
 
   EXPECT_THAT(
@@ -552,11 +504,10 @@ TEST_F(RawQueryTokenizerTest, CJKT) {
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
                HasSubstr("Characters in property name must all be ASCII")));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("cat OR ねこ"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "cat"),
-                               EqualsToken(Token::Type::QUERY_OR, ""),
-                               EqualsToken(Token::Type::REGULAR, "ねこ"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("cat OR ねこ"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "cat"),
+                                       EqualsToken(Token::QUERY_OR, ""),
+                                       EqualsToken(Token::REGULAR, "ねこ"))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("cat ORねこ"),
@@ -592,45 +543,40 @@ TEST_F(RawQueryTokenizerTest, OtherChars) {
                                               language_segmenter.get()));
 
   // Comma is ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll(",term1, ,"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll(",term1, ,"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(,term1),"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Exclusion operator and comma are ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("-,term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-,term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("-term1,"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                               EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1,"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "term1"))));
 
   // Colon and comma are ignored
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:,term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "property1"),
-                               EqualsToken(Token::Type::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:,term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "property1"),
+                                       EqualsToken(Token::REGULAR, "term1"))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1,term2"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::Type::REGULAR, "term1"),
-                  EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:term1,term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::REGULAR, "term2"))));
 
   // This is a special case for OR, unknown chars are treated the same as
   // whitespaces before and after OR.
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("term1,OR,term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
-                               EqualsToken(Token::Type::QUERY_OR, ""),
-                               EqualsToken(Token::Type::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1,OR,term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::QUERY_OR, ""),
+                                       EqualsToken(Token::REGULAR, "term2"))));
 }
 
 TEST_F(RawQueryTokenizerTest, Mix) {
@@ -647,38 +593,37 @@ TEST_F(RawQueryTokenizerTest, Mix) {
     EXPECT_THAT(raw_query_tokenizer->TokenizeAll(
                     "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"),
                 IsOkAndHolds(ElementsAre(
-                    EqualsToken(Token::Type::REGULAR, "こんにちは"),
-                    EqualsToken(Token::Type::REGULAR, "good"),
-                    EqualsToken(Token::Type::REGULAR, "afternoon"),
-                    EqualsToken(Token::Type::QUERY_PROPERTY, "title"),
-                    EqualsToken(Token::Type::REGULAR, "今天"),
-                    EqualsToken(Token::Type::QUERY_OR, ""),
-                    EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                    EqualsToken(Token::Type::REGULAR, "ใน"),
-                    EqualsToken(Token::Type::REGULAR, "วันนี้"),
-                    EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                    EqualsToken(Token::Type::REGULAR, "B12"),
-                    EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+                    EqualsToken(Token::REGULAR, "こんにちは"),
+                    EqualsToken(Token::REGULAR, "good"),
+                    EqualsToken(Token::REGULAR, "afternoon"),
+                    EqualsToken(Token::QUERY_PROPERTY, "title"),
+                    EqualsToken(Token::REGULAR, "今天"),
+                    EqualsToken(Token::QUERY_OR, ""),
+                    EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                    EqualsToken(Token::REGULAR, "ใน"),
+                    EqualsToken(Token::REGULAR, "วันนี้"),
+                    EqualsToken(Token::QUERY_EXCLUSION, ""),
+                    EqualsToken(Token::REGULAR, "B12"),
+                    EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
   } else {
     ICING_ASSERT_OK_AND_ASSIGN(
         std::vector<Token> tokens,
         raw_query_tokenizer->TokenizeAll(
             "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"));
-    EXPECT_THAT(
-        tokens,
-        ElementsAre(EqualsToken(Token::Type::REGULAR, "こんにちは"),
-                    EqualsToken(Token::Type::REGULAR, "good"),
-                    EqualsToken(Token::Type::REGULAR, "afternoon"),
-                    EqualsToken(Token::Type::QUERY_PROPERTY, "title"),
-                    EqualsToken(Token::Type::REGULAR, "今天"),
-                    EqualsToken(Token::Type::QUERY_OR, ""),
-                    EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
-                    EqualsToken(Token::Type::REGULAR, "ใน"),
-                    EqualsToken(Token::Type::REGULAR, "วัน"),
-                    EqualsToken(Token::Type::REGULAR, "นี้"),
-                    EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
-                    EqualsToken(Token::Type::REGULAR, "B12"),
-                    EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
+    EXPECT_THAT(tokens,
+                ElementsAre(EqualsToken(Token::REGULAR, "こんにちは"),
+                            EqualsToken(Token::REGULAR, "good"),
+                            EqualsToken(Token::REGULAR, "afternoon"),
+                            EqualsToken(Token::QUERY_PROPERTY, "title"),
+                            EqualsToken(Token::REGULAR, "今天"),
+                            EqualsToken(Token::QUERY_OR, ""),
+                            EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                            EqualsToken(Token::REGULAR, "ใน"),
+                            EqualsToken(Token::REGULAR, "วัน"),
+                            EqualsToken(Token::REGULAR, "นี้"),
+                            EqualsToken(Token::QUERY_EXCLUSION, ""),
+                            EqualsToken(Token::REGULAR, "B12"),
+                            EqualsToken(Token::QUERY_RIGHT_PARENTHESES, "")));
   }
 }
 
diff --git a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
index 8e1e563..6b1cb3a 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
@@ -15,10 +15,10 @@
 #include "icing/tokenization/reverse_jni/reverse-jni-break-iterator.h"
 
 #include <jni.h>
+#include <math.h>
 
 #include <cassert>
 #include <cctype>
-#include <cmath>
 #include <map>
 
 #include "icing/jni/jni-cache.h"
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
index e5de6e6..76219b5 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
@@ -51,9 +51,9 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     if (term_end_exclusive_.utf16_index() == 0) {
       int first = break_iterator_->First();
       if (!term_start_.MoveToUtf16(first)) {
-        // First is guaranteed to succeed and return a position within bonds.
-        // So the only possible failure could be an invalid sequence. Mark as
-        // DONE and return.
+        // First is guaranteed to succeed and return a position within bonds. So
+        // the only possible failure could be an invalid sequence. Mark as DONE
+        // and return.
         MarkAsDone();
         return false;
       }
@@ -74,7 +74,14 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       MarkAsDone();
       return false;
     }
-    return true;
+
+    // Check if the current term is valid. We consider any term valid if its
+    // first character is valid. If it's not valid, then we need to advance to
+    // the next term.
+    if (IsValidTerm()) {
+      return true;
+    }
+    return Advance();
   }
 
   // Returns the current term. It can be called only when Advance() returns
@@ -237,7 +244,7 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     // 4. The start and end indices point to a segment, but we need to ensure
     // that this segment is 1) valid and 2) ends before offset. Otherwise, we'll
     // need a segment prior to this one.
-    if (term_end_exclusive_.utf32_index() > offset) {
+    if (term_end_exclusive_.utf32_index() > offset || !IsValidTerm()) {
       return ResetToTermEndingBeforeUtf32(term_start_.utf32_index());
     }
     return term_start_.utf32_index();
@@ -277,6 +284,21 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     return term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone;
   }
 
+  bool IsValidTerm() const {
+    // Rule 1: all ASCII terms will be returned.
+    // We know it's a ASCII term by checking the first char.
+    if (i18n_utils::IsAscii(text_[term_start_.utf8_index()])) {
+      return true;
+    }
+
+    // Rule 2: for non-ASCII terms, only the alphabetic terms are returned.
+    // We know it's an alphabetic term by checking the first unicode character.
+    if (i18n_utils::IsAlphabeticAt(text_, term_start_.utf8_index())) {
+      return true;
+    }
+    return false;
+  }
+
   // All of ReverseJniBreakIterator's functions return UTF-16 boundaries. So
   // this class needs to maintain state to convert between UTF-16 and UTF-8.
   std::unique_ptr<ReverseJniBreakIterator> break_iterator_;
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
index 277ece6..b1a8f72 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
@@ -185,7 +185,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, Non_ASCII_Non_Alphabetic) {
   // Full-width (non-ASCII) punctuation marks and special characters are left
   // out.
   EXPECT_THAT(language_segmenter->GetAllTerms("。？·Hello！×"),
-              IsOkAndHolds(ElementsAre("。", "？", "·", "Hello", "！", "×")));
+              IsOkAndHolds(ElementsAre("Hello")));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest, Acronym) {
@@ -246,9 +246,9 @@ TEST_P(ReverseJniLanguageSegmenterTest, WordConnector) {
 
   // Connectors don't connect if one side is an invalid term (？)
   EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz:？"),
-              IsOkAndHolds(ElementsAre("bar:baz", ":", "？")));
+              IsOkAndHolds(ElementsAre("bar:baz", ":")));
   EXPECT_THAT(language_segmenter->GetAllTerms("？:bar:baz"),
-              IsOkAndHolds(ElementsAre("？", ":", "bar:baz")));
+              IsOkAndHolds(ElementsAre(":", "bar:baz")));
   EXPECT_THAT(language_segmenter->GetAllTerms("3:14"),
               IsOkAndHolds(ElementsAre("3", ":", "14")));
   EXPECT_THAT(language_segmenter->GetAllTerms("私:は"),
@@ -366,17 +366,6 @@ TEST_P(ReverseJniLanguageSegmenterTest, Number) {
               IsOkAndHolds(ElementsAre("-", "123")));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, FullWidthNumbers) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      auto language_segmenter,
-      language_segmenter_factory::Create(
-          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
-
-  EXPECT_THAT(language_segmenter->GetAllTerms("０１２３４５６７８９"),
-              IsOkAndHolds(ElementsAre("０", "１", "２", "３", "４", "５", "６",
-                                       "７", "８", "９")));
-}
-
 TEST_P(ReverseJniLanguageSegmenterTest, ContinuousWhitespaces) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
@@ -413,17 +402,15 @@ TEST_P(ReverseJniLanguageSegmenterTest, CJKT) {
   // have whitespaces as word delimiter.
 
   // Chinese
-  EXPECT_THAT(
-      language_segmenter->GetAllTerms("我每天走路去上班。"),
-      IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班", "。")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("我每天走路去上班。"),
+              IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班")));
   // Japanese
   EXPECT_THAT(language_segmenter->GetAllTerms("私は毎日仕事に歩いています。"),
               IsOkAndHolds(ElementsAre("私", "は", "毎日", "仕事", "に", "歩",
-                                       "い", "てい", "ます", "。")));
+                                       "い", "てい", "ます")));
   // Khmer
   EXPECT_THAT(language_segmenter->GetAllTerms("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
-              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ", "។")));
-
+              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ")));
   // Thai
   EXPECT_THAT(
       language_segmenter->GetAllTerms("ฉันเดินไปทำงานทุกวัน"),
@@ -854,19 +841,16 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
   // String:       "我每天走路去上班。"
-  //                ^ ^  ^   ^^   ^
-  // UTF-8 idx:     0 3  9  15 18 24
-  // UTF-832 idx:   0 1  3   5 6  8
+  //                ^ ^  ^   ^^
+  // UTF-8 idx:     0 3  9  15 18
+  // UTF-832 idx:   0 1  3   5 6
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("每天"));
 
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("走路"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
-  EXPECT_THAT(itr->GetTerm(), Eq("。"));
-
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(8),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
@@ -881,21 +865,18 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
   // String:       "私は毎日仕事に歩いています。"
-  //                ^ ^ ^  ^  ^ ^ ^ ^  ^  ^
-  // UTF-8 idx:     0 3 6  12 18212427 33 39
-  // UTF-32 idx:    0 1 2  4  6 7 8 9  11 13
+  //                ^ ^ ^  ^  ^ ^ ^ ^  ^
+  // UTF-8 idx:     0 3 6  12 18212427 33
+  // UTF-32 idx:    0 1 2  4  6 7 8 9  11
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(13),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(11),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(3), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("仕事"));
-
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(13)));
-  EXPECT_THAT(itr->GetTerm(), Eq("。"));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfterUtf32) {
@@ -907,16 +888,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
   // String:            "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //                     ^ ^   ^   ^  ^
-  // UTF-8 idx:          0 9   24  45 69
-  // UTF-32 idx:         0 3   8   15 23
+  //                     ^ ^   ^   ^
+  // UTF-8 idx:          0 9   24  45
+  // UTF-32 idx:         0 3   8   15
   EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("ដើរទៅ"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15), IsOkAndHolds(Eq(23)));
-  EXPECT_THAT(itr->GetTerm(), Eq("។"));
-
-  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(23),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
diff --git a/icing/tokenization/token.h b/icing/tokenization/token.h
index 0c268be..dda9efc 100644
--- a/icing/tokenization/token.h
+++ b/icing/tokenization/token.h
@@ -21,14 +21,11 @@ namespace icing {
 namespace lib {
 
 struct Token {
-  enum class Type {
+  enum Type {
     // Common types
     REGULAR,  // A token without special meanings, the value of it will be
               // indexed or searched directly
 
-    VERBATIM,  // A token that should be indexed and searched without any
-               // modifications to the raw text
-
     // Types only used in raw query
     QUERY_OR,         // Indicates OR logic between its left and right tokens
     QUERY_EXCLUSION,  // Indicates exclusion operation on next token
diff --git a/icing/tokenization/tokenizer-factory.cc b/icing/tokenization/tokenizer-factory.cc
index b2508f7..9b59acf 100644
--- a/icing/tokenization/tokenizer-factory.cc
+++ b/icing/tokenization/tokenizer-factory.cc
@@ -23,7 +23,6 @@
 #include "icing/tokenization/plain-tokenizer.h"
 #include "icing/tokenization/raw-query-tokenizer.h"
 #include "icing/tokenization/tokenizer.h"
-#include "icing/tokenization/verbatim-tokenizer.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -39,8 +38,6 @@ CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
   switch (type) {
     case StringIndexingConfig::TokenizerType::PLAIN:
       return std::make_unique<PlainTokenizer>(lang_segmenter);
-    case StringIndexingConfig::TokenizerType::VERBATIM:
-      return std::make_unique<VerbatimTokenizer>();
     case StringIndexingConfig::TokenizerType::NONE:
       [[fallthrough]];
     default:
diff --git a/icing/tokenization/tokenizer.h b/icing/tokenization/tokenizer.h
index 24f8269..b4f0c6e 100644
--- a/icing/tokenization/tokenizer.h
+++ b/icing/tokenization/tokenizer.h
@@ -40,6 +40,14 @@ class Tokenizer {
  public:
   virtual ~Tokenizer() = default;
 
+  enum Type {
+    // Index tokenizers
+    PLAIN,  // Used to tokenize plain text input
+
+    // Query tokenizers
+    RAW_QUERY,  // Used to tokenize raw queries
+  };
+
   // An iterator helping to get tokens.
   // Example usage:
   //
@@ -75,26 +83,22 @@ class Tokenizer {
     // offset.
     // Ex.
     // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
-    // iterator.ResetToTokenStartingAfter(4);
+    // iterator.ResetToTokenAfter(4);
     // // The first full token starting after position 4 (the 'b' in "bar") is
     // // "baz".
     // PrintToken(iterator.GetToken());  // prints "baz"
-    virtual bool ResetToTokenStartingAfter(int32_t utf32_offset) {
-      return false;
-    }
+    virtual bool ResetToTokenAfter(int32_t offset) { return false; }
 
     // Sets the tokenizer to point at the first token that *ends* *before*
     // offset. Returns false if there are no valid tokens ending
     // before offset.
     // Ex.
     // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
-    // iterator.ResetToTokenEndingBefore(4);
+    // iterator.ResetToTokenBefore(4);
     // // The first full token ending before position 4 (the 'b' in "bar") is
     // // "foo".
     // PrintToken(iterator.GetToken());  // prints "foo"
-    virtual bool ResetToTokenEndingBefore(int32_t utf32_offset) {
-      return false;
-    }
+    virtual bool ResetToTokenBefore(int32_t offset) { return false; }
 
     virtual bool ResetToStart() { return false; }
   };
diff --git a/icing/tokenization/verbatim-tokenizer.cc b/icing/tokenization/verbatim-tokenizer.cc
deleted file mode 100644
index 0d3a320..0000000
--- a/icing/tokenization/verbatim-tokenizer.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/tokenization/verbatim-tokenizer.h"
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/util/character-iterator.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-class VerbatimTokenIterator : public Tokenizer::Iterator {
- public:
-  explicit VerbatimTokenIterator(std::string_view text)
-      : term_(std::move(text)) {}
-
-  bool Advance() override {
-    if (term_.empty() || has_advanced_to_end_) {
-      return false;
-    }
-
-    has_advanced_to_end_ = true;
-    return true;
-  }
-
-  Token GetToken() const override {
-    if (term_.empty() || !has_advanced_to_end_) {
-      return Token(Token::Type::INVALID);
-    }
-
-    return Token(Token::Type::VERBATIM, term_);
-  }
-
-  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenStart()
-      override {
-    if (term_.empty()) {
-      return absl_ports::AbortedError(
-          "Could not calculate start of empty token.");
-    }
-
-    return CharacterIterator(term_, 0, 0, 0);
-  }
-
-  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenEndExclusive()
-      override {
-    if (term_.empty()) {
-      return absl_ports::AbortedError(
-          "Could not calculate end of empty token.");
-    }
-
-    if (token_end_iterator_.utf8_index() >= 0) {
-      return token_end_iterator_;
-    }
-
-    bool moved_to_token_end = token_end_iterator_.MoveToUtf8(term_.length());
-    if (moved_to_token_end) {
-      return token_end_iterator_;
-    } else {
-      return absl_ports::AbortedError("Could not move to end of token.");
-    }
-  }
-
-  bool ResetToTokenStartingAfter(int32_t utf32_offset) override {
-    // We can only reset to the sole verbatim token, so we must have a negative
-    // offset for it to be considered the token after.
-    if (utf32_offset < 0) {
-      // Because we are now at the sole verbatim token, we should ensure we can
-      // no longer advance past it.
-      has_advanced_to_end_ = true;
-      return true;
-    }
-    return false;
-  }
-
-  bool ResetToTokenEndingBefore(int32_t utf32_offset) override {
-    // We can only reset to the sole verbatim token, so we must have an offset
-    // after the end of the token for the reset to be valid. This means the
-    // provided utf-32 offset must be equal to or greater than the utf-32 length
-    // of the token.
-    if (token_end_iterator_.utf8_index() < 0) {
-      // Moves one index past the end of the term.
-      bool moved_to_token_end = token_end_iterator_.MoveToUtf8(term_.length());
-      if (!moved_to_token_end) {
-        // We're unable to reset as we failed to move to the end of the term.
-        return false;
-      }
-    }
-
-    if (utf32_offset >= token_end_iterator_.utf32_index()) {
-      // Because we are now at the sole verbatim token, we should ensure we can
-      // no longer advance past it.
-      has_advanced_to_end_ = true;
-      return true;
-    }
-    return false;
-  }
-
-  bool ResetToStart() override {
-    has_advanced_to_end_ = true;
-    return true;
-  }
-
- private:
-  std::string_view term_;
-  CharacterIterator token_end_iterator_ = CharacterIterator(term_, -1, -1, -1);
-  // Used to determine whether we have advanced on the sole verbatim token
-  bool has_advanced_to_end_ = false;
-};
-
-libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
-VerbatimTokenizer::Tokenize(std::string_view text) const {
-  return std::make_unique<VerbatimTokenIterator>(text);
-}
-
-libtextclassifier3::StatusOr<std::vector<Token>> VerbatimTokenizer::TokenizeAll(
-    std::string_view text) const {
-  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
-                         Tokenize(text));
-  std::vector<Token> tokens;
-  while (iterator->Advance()) {
-    tokens.push_back(iterator->GetToken());
-  }
-  return tokens;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tokenization/verbatim-tokenizer.h b/icing/tokenization/verbatim-tokenizer.h
deleted file mode 100644
index 8404cf1..0000000
--- a/icing/tokenization/verbatim-tokenizer.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOKENIZATION_VERBATIM_H_
-#define ICING_TOKENIZATION_VERBATIM_H_
-
-#include <memory>
-#include <string_view>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/tokenization/tokenizer.h"
-
-namespace icing {
-namespace lib {
-
-// Provides verbatim tokenization on input text
-class VerbatimTokenizer : public Tokenizer {
- public:
-  libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
-      std::string_view text) const override;
-
-  libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
-      std::string_view text) const override;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TOKENIZATION_VERBATIM_H_
diff --git a/icing/tokenization/verbatim-tokenizer_test.cc b/icing/tokenization/verbatim-tokenizer_test.cc
deleted file mode 100644
index e38c7aa..0000000
--- a/icing/tokenization/verbatim-tokenizer_test.cc
+++ /dev/null
@@ -1,209 +0,0 @@
-// Copyright (C) 2021 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string_view>
-
-#include "gmock/gmock.h"
-#include "icing/portable/platform.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
-#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/test-data.h"
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/tokenizer-factory.h"
-#include "icing/util/character-iterator.h"
-#include "unicode/uloc.h"
-
-namespace icing {
-namespace lib {
-namespace {
-using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::IsEmpty;
-
-class VerbatimTokenizerTest : public ::testing::Test {
- protected:
-  void SetUp() override {
-    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
-      ICING_ASSERT_OK(
-          // File generated via icu_data_file rule in //icing/BUILD.
-          icu_data_file_helper::SetUpICUDataFile(
-              GetTestFilePath("icing/icu.dat")));
-    }
-
-    jni_cache_ = GetTestJniCache();
-    language_segmenter_factory::SegmenterOptions options(ULOC_US,
-                                                         jni_cache_.get());
-    ICING_ASSERT_OK_AND_ASSIGN(
-        language_segmenter_,
-        language_segmenter_factory::Create(std::move(options)));
-  }
-
-  std::unique_ptr<const JniCache> jni_cache_;
-  std::unique_ptr<LanguageSegmenter> language_segmenter_;
-};
-
-TEST_F(VerbatimTokenizerTest, Empty) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  EXPECT_THAT(verbatim_tokenizer->TokenizeAll(""), IsOkAndHolds(IsEmpty()));
-}
-
-TEST_F(VerbatimTokenizerTest, Simple) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  EXPECT_THAT(
-      verbatim_tokenizer->TokenizeAll("foo bar"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::VERBATIM, "foo bar"))));
-}
-
-TEST_F(VerbatimTokenizerTest, Punctuation) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  EXPECT_THAT(verbatim_tokenizer->TokenizeAll("Hello, world!"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::Type::VERBATIM, "Hello, world!"))));
-}
-
-TEST_F(VerbatimTokenizerTest, InvalidTokenBeforeAdvancing) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  constexpr std::string_view kText = "Hello, world!";
-  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
-
-  // We should get an invalid token if we get the token before advancing.
-  EXPECT_THAT(token_iterator->GetToken(),
-              EqualsToken(Token::Type::INVALID, ""));
-}
-
-TEST_F(VerbatimTokenizerTest, ResetToTokenEndingBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  constexpr std::string_view kText = "Hello, world!";
-  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
-
-  // Reset to beginning of verbatim of token. We provide an offset of 13 as it
-  // is larger than the final index (12) of the verbatim token.
-  EXPECT_TRUE(token_iterator->ResetToTokenEndingBefore(13));
-  EXPECT_THAT(token_iterator->GetToken(),
-              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
-
-  // Ensure our cached character iterator propertly maintains the end of the
-  // verbatim token.
-  EXPECT_TRUE(token_iterator->ResetToTokenEndingBefore(13));
-  EXPECT_THAT(token_iterator->GetToken(),
-              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
-
-  // We should not be able to reset with an offset before or within
-  // the verbatim token's utf-32 length.
-  EXPECT_FALSE(token_iterator->ResetToTokenEndingBefore(0));
-  EXPECT_FALSE(token_iterator->ResetToTokenEndingBefore(12));
-}
-
-TEST_F(VerbatimTokenizerTest, ResetToTokenStartingAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  constexpr std::string_view kText = "Hello, world!";
-  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
-
-  // Get token without resetting
-  EXPECT_TRUE(token_iterator->Advance());
-  EXPECT_THAT(token_iterator->GetToken(),
-              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
-
-  // We expect a sole verbatim token, so it's not possible to reset after the
-  // start of the token.
-  EXPECT_FALSE(token_iterator->ResetToTokenStartingAfter(1));
-
-  // We expect to be reset to the sole verbatim token when the offset is
-  // negative.
-  EXPECT_TRUE(token_iterator->ResetToTokenStartingAfter(-1));
-  EXPECT_THAT(token_iterator->GetToken(),
-              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
-}
-
-TEST_F(VerbatimTokenizerTest, ResetToStart) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  constexpr std::string_view kText = "Hello, world!";
-  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
-
-  // Get token without resetting
-  EXPECT_TRUE(token_iterator->Advance());
-  EXPECT_THAT(token_iterator->GetToken(),
-              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
-
-  // Retrieve token again after resetting to start
-  EXPECT_TRUE(token_iterator->ResetToStart());
-  EXPECT_THAT(token_iterator->GetToken(),
-              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
-}
-
-TEST_F(VerbatimTokenizerTest, CalculateTokenStart) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  constexpr std::string_view kText = "Hello, world!";
-  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
-
-  ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator start_character_iterator,
-                             token_iterator->CalculateTokenStart());
-
-  // We should retrieve the character 'H', the first character of the token.
-  EXPECT_THAT(start_character_iterator.GetCurrentChar(), Eq('H'));
-}
-
-TEST_F(VerbatimTokenizerTest, CalculateTokenEnd) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 StringIndexingConfig::TokenizerType::VERBATIM,
-                                 language_segmenter_.get()));
-
-  constexpr std::string_view kText = "Hello, world!";
-  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
-
-  ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator end_character_iterator,
-                             token_iterator->CalculateTokenEndExclusive());
-
-  // We should retrieve the the null character, as the returned character
-  // iterator will be set one past the end of the token.
-  EXPECT_THAT(end_character_iterator.GetCurrentChar(), Eq('\0'));
-}
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/transform/icu/icu-normalizer.cc b/icing/transform/icu/icu-normalizer.cc
index aceb11d..eb0eead 100644
--- a/icing/transform/icu/icu-normalizer.cc
+++ b/icing/transform/icu/icu-normalizer.cc
@@ -29,7 +29,6 @@
 #include "icing/util/status-macros.h"
 #include "unicode/umachine.h"
 #include "unicode/unorm2.h"
-#include "unicode/ustring.h"
 #include "unicode/utrans.h"
 
 namespace icing {
@@ -158,18 +157,14 @@ std::string IcuNormalizer::NormalizeLatin(const UNormalizer2* normalizer2,
                                           const std::string_view term) const {
   std::string result;
   result.reserve(term.length());
-  int current_pos = 0;
-  while (current_pos < term.length()) {
-    if (i18n_utils::IsAscii(term[current_pos])) {
-      result.push_back(std::tolower(term[current_pos]));
-      ++current_pos;
-    } else {
-      UChar32 uchar32 =
-          i18n_utils::GetUChar32At(term.data(), term.length(), current_pos);
+  for (int i = 0; i < term.length(); i++) {
+    if (i18n_utils::IsAscii(term[i])) {
+      result.push_back(std::tolower(term[i]));
+    } else if (i18n_utils::IsLeadUtf8Byte(term[i])) {
+      UChar32 uchar32 = i18n_utils::GetUChar32At(term.data(), term.length(), i);
       if (uchar32 == i18n_utils::kInvalidUChar32) {
         ICING_LOG(WARNING) << "Unable to get uchar32 from " << term
-                           << " at position" << current_pos;
-        current_pos += i18n_utils::GetUtf8Length(uchar32);
+                           << " at position" << i;
         continue;
       }
       char ascii_char;
@@ -182,9 +177,8 @@ std::string IcuNormalizer::NormalizeLatin(const UNormalizer2* normalizer2,
         // tokenized. We handle it here in case there're something wrong with
         // the tokenizers.
         int utf8_length = i18n_utils::GetUtf8Length(uchar32);
-        absl_ports::StrAppend(&result, term.substr(current_pos, utf8_length));
+        absl_ports::StrAppend(&result, term.substr(i, utf8_length));
       }
-      current_pos += i18n_utils::GetUtf8Length(uchar32);
     }
   }
 
@@ -267,106 +261,5 @@ std::string IcuNormalizer::TermTransformer::Transform(
   return std::move(utf8_term_or).ValueOrDie();
 }
 
-CharacterIterator FindNormalizedLatinMatchEndPosition(
-    const UNormalizer2* normalizer2, std::string_view term,
-    CharacterIterator char_itr, std::string_view normalized_term) {
-  CharacterIterator normalized_char_itr(normalized_term);
-  char ascii_char;
-  while (char_itr.utf8_index() < term.length() &&
-         normalized_char_itr.utf8_index() < normalized_term.length()) {
-    UChar32 c = char_itr.GetCurrentChar();
-    if (i18n_utils::IsAscii(c)) {
-      c = std::tolower(c);
-    } else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) {
-      c = ascii_char;
-    }
-    UChar32 normalized_c = normalized_char_itr.GetCurrentChar();
-    if (c != normalized_c) {
-      return char_itr;
-    }
-    char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
-    normalized_char_itr.AdvanceToUtf32(normalized_char_itr.utf32_index() + 1);
-  }
-  return char_itr;
-}
-
-CharacterIterator
-IcuNormalizer::TermTransformer::FindNormalizedNonLatinMatchEndPosition(
-    std::string_view term, CharacterIterator char_itr,
-    std::string_view normalized_term) const {
-  CharacterIterator normalized_char_itr(normalized_term);
-  UErrorCode status = U_ZERO_ERROR;
-
-  constexpr int kUtf16CharBufferLength = 6;
-  UChar c16[kUtf16CharBufferLength];
-  int32_t c16_length;
-  int32_t limit;
-
-  constexpr int kCharBufferLength = 3 * 4;
-  char normalized_buffer[kCharBufferLength];
-  int32_t c8_length;
-  while (char_itr.utf8_index() < term.length() &&
-         normalized_char_itr.utf8_index() < normalized_term.length()) {
-    UChar32 c = char_itr.GetCurrentChar();
-    int c_lenth = i18n_utils::GetUtf8Length(c);
-    u_strFromUTF8(c16, kUtf16CharBufferLength, &c16_length,
-                  term.data() + char_itr.utf8_index(),
-                  /*srcLength=*/c_lenth, &status);
-    if (U_FAILURE(status)) {
-      break;
-    }
-
-    limit = c16_length;
-    utrans_transUChars(u_transliterator_, c16, &c16_length,
-                       kUtf16CharBufferLength,
-                       /*start=*/0, &limit, &status);
-    if (U_FAILURE(status)) {
-      break;
-    }
-
-    u_strToUTF8(normalized_buffer, kCharBufferLength, &c8_length, c16,
-                c16_length, &status);
-    if (U_FAILURE(status)) {
-      break;
-    }
-
-    for (int i = 0; i < c8_length; ++i) {
-      if (normalized_buffer[i] !=
-          normalized_term[normalized_char_itr.utf8_index() + i]) {
-        return char_itr;
-      }
-    }
-    normalized_char_itr.AdvanceToUtf8(normalized_char_itr.utf8_index() +
-                                      c8_length);
-    char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
-  }
-  if (U_FAILURE(status)) {
-    // Failed to transform, return its original form.
-    ICING_LOG(WARNING) << "Failed to normalize UTF8 term: " << term;
-  }
-  return char_itr;
-}
-
-CharacterIterator IcuNormalizer::FindNormalizedMatchEndPosition(
-    std::string_view term, std::string_view normalized_term) const {
-  UErrorCode status = U_ZERO_ERROR;
-  // ICU manages the singleton instance
-  const UNormalizer2* normalizer2 = unorm2_getNFCInstance(&status);
-  if (U_FAILURE(status)) {
-    ICING_LOG(WARNING) << "Failed to create a UNormalizer2 instance";
-  }
-
-  CharacterIterator char_itr(term);
-  UChar32 first_uchar32 = char_itr.GetCurrentChar();
-  if (normalizer2 != nullptr && first_uchar32 != i18n_utils::kInvalidUChar32 &&
-      DiacriticCharToAscii(normalizer2, first_uchar32, /*char_out=*/nullptr)) {
-    return FindNormalizedLatinMatchEndPosition(normalizer2, term, char_itr,
-                                               normalized_term);
-  } else {
-    return term_transformer_->FindNormalizedNonLatinMatchEndPosition(
-        term, char_itr, normalized_term);
-  }
-}
-
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/icu/icu-normalizer.h b/icing/transform/icu/icu-normalizer.h
index d4f1ebd..f20a9fb 100644
--- a/icing/transform/icu/icu-normalizer.h
+++ b/icing/transform/icu/icu-normalizer.h
@@ -21,7 +21,6 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/transform/normalizer.h"
-#include "icing/util/character-iterator.h"
 #include "unicode/unorm2.h"
 #include "unicode/utrans.h"
 
@@ -57,17 +56,6 @@ class IcuNormalizer : public Normalizer {
   // result in the non-Latin characters not properly being normalized
   std::string NormalizeTerm(std::string_view term) const override;
 
-  // Returns a CharacterIterator pointing to one past the end of the segment of
-  // term that (once normalized) matches with normalized_term.
-  //
-  // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return
-  // CharacterIterator(u8:4, u16:4, u32:4).
-  //
-  // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return
-  // CharacterIterator(u8:0, u16:0, u32:0).
-  CharacterIterator FindNormalizedMatchEndPosition(
-      std::string_view term, std::string_view normalized_term) const override;
-
  private:
   // A handler class that helps manage the lifecycle of UTransliterator. It's
   // used in IcuNormalizer to transform terms into the formats we need.
@@ -87,12 +75,6 @@ class IcuNormalizer : public Normalizer {
     // Transforms the text based on our rules described at top of this file
     std::string Transform(std::string_view term) const;
 
-    // Returns a CharacterIterator pointing to one past the end of the segment
-    // of a non-latin term that (once normalized) matches with normalized_term.
-    CharacterIterator FindNormalizedNonLatinMatchEndPosition(
-        std::string_view term, CharacterIterator char_itr,
-        std::string_view normalized_term) const;
-
    private:
     explicit TermTransformer(UTransliterator* u_transliterator);
 
diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc
index fdd4c70..b037538 100644
--- a/icing/transform/icu/icu-normalizer_benchmark.cc
+++ b/icing/transform/icu/icu-normalizer_benchmark.cc
@@ -14,8 +14,8 @@
 
 #include "testing/base/public/benchmark.h"
 #include "gmock/gmock.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
@@ -161,124 +161,6 @@ BENCHMARK(BM_NormalizeHiragana)
     ->Arg(2048000)
     ->Arg(4096000);
 
-void BM_UppercaseSubTokenLength(benchmark::State& state) {
-  bool run_via_adb = absl::GetFlag(FLAGS_adb);
-  if (!run_via_adb) {
-    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
-        GetTestFilePath("icing/icu.dat")));
-  }
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Normalizer> normalizer,
-      normalizer_factory::Create(
-
-          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
-
-  std::string input_string(state.range(0), 'A');
-  std::string normalized_input_string(state.range(0), 'a');
-  for (auto _ : state) {
-    normalizer->FindNormalizedMatchEndPosition(input_string,
-                                               normalized_input_string);
-  }
-}
-BENCHMARK(BM_UppercaseSubTokenLength)
-    ->Arg(1000)
-    ->Arg(2000)
-    ->Arg(4000)
-    ->Arg(8000)
-    ->Arg(16000)
-    ->Arg(32000)
-    ->Arg(64000)
-    ->Arg(128000)
-    ->Arg(256000)
-    ->Arg(384000)
-    ->Arg(512000)
-    ->Arg(1024000)
-    ->Arg(2048000)
-    ->Arg(4096000);
-
-void BM_AccentSubTokenLength(benchmark::State& state) {
-  bool run_via_adb = absl::GetFlag(FLAGS_adb);
-  if (!run_via_adb) {
-    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
-        GetTestFilePath("icing/icu.dat")));
-  }
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Normalizer> normalizer,
-      normalizer_factory::Create(
-
-          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
-
-  std::string input_string;
-  std::string normalized_input_string;
-  while (input_string.length() < state.range(0)) {
-    input_string.append("àáâãā");
-    normalized_input_string.append("aaaaa");
-  }
-
-  for (auto _ : state) {
-    normalizer->FindNormalizedMatchEndPosition(input_string,
-                                               normalized_input_string);
-  }
-}
-BENCHMARK(BM_AccentSubTokenLength)
-    ->Arg(1000)
-    ->Arg(2000)
-    ->Arg(4000)
-    ->Arg(8000)
-    ->Arg(16000)
-    ->Arg(32000)
-    ->Arg(64000)
-    ->Arg(128000)
-    ->Arg(256000)
-    ->Arg(384000)
-    ->Arg(512000)
-    ->Arg(1024000)
-    ->Arg(2048000)
-    ->Arg(4096000);
-
-void BM_HiraganaSubTokenLength(benchmark::State& state) {
-  bool run_via_adb = absl::GetFlag(FLAGS_adb);
-  if (!run_via_adb) {
-    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
-        GetTestFilePath("icing/icu.dat")));
-  }
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Normalizer> normalizer,
-      normalizer_factory::Create(
-
-          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
-
-  std::string input_string;
-  std::string normalized_input_string;
-  while (input_string.length() < state.range(0)) {
-    input_string.append("あいうえお");
-    normalized_input_string.append("アイウエオ");
-  }
-
-  for (auto _ : state) {
-    normalizer->FindNormalizedMatchEndPosition(input_string,
-                                               normalized_input_string);
-  }
-}
-BENCHMARK(BM_HiraganaSubTokenLength)
-    ->Arg(1000)
-    ->Arg(2000)
-    ->Arg(4000)
-    ->Arg(8000)
-    ->Arg(16000)
-    ->Arg(32000)
-    ->Arg(64000)
-    ->Arg(128000)
-    ->Arg(256000)
-    ->Arg(384000)
-    ->Arg(512000)
-    ->Arg(1024000)
-    ->Arg(2048000)
-    ->Arg(4096000);
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/transform/icu/icu-normalizer_test.cc b/icing/transform/icu/icu-normalizer_test.cc
index 143da17..f5d20ff 100644
--- a/icing/transform/icu/icu-normalizer_test.cc
+++ b/icing/transform/icu/icu-normalizer_test.cc
@@ -16,8 +16,8 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/testing/test-data.h"
 #include "icing/transform/normalizer-factory.h"
@@ -231,104 +231,6 @@ TEST_F(IcuNormalizerTest, Truncate) {
   }
 }
 
-TEST_F(IcuNormalizerTest, PrefixMatchLength) {
-  // Verify that FindNormalizedMatchEndPosition will properly find the length of
-  // the prefix match when given a non-normalized term and a normalized term
-  // is a prefix of the non-normalized one.
-  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
-                                                  /*max_term_byte_size=*/1000));
-
-  // Upper to lower
-  std::string term = "MDI";
-  CharacterIterator match_end =
-      normalizer->FindNormalizedMatchEndPosition(term, "md");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("MD"));
-
-  term = "Icing";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "icin");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Icin"));
-
-  // Full-width
-  term = "５２５６００";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "525");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
-
-  term = "ＦＵＬＬＷＩＤＴＨ";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "full");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
-
-  // Hiragana to Katakana
-  term = "あいうえお";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "アイ");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
-
-  term = "かきくけこ";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "カ");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
-
-  // Latin accents
-  term = "Zürich";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "zur");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
-
-  term = "après-midi";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "apre");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
-
-  term = "Buenos días";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "buenos di");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Buenos dí"));
-}
-
-TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) {
-  // Verify that FindNormalizedMatchEndPosition will properly find the length of
-  // the prefix match when given a non-normalized term and a normalized term
-  // that share a common prefix.
-  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
-                                                  /*max_term_byte_size=*/1000));
-
-  // Upper to lower
-  std::string term = "MDI";
-  CharacterIterator match_end =
-      normalizer->FindNormalizedMatchEndPosition(term, "mgm");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("M"));
-
-  term = "Icing";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "icky");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Ic"));
-
-  // Full-width
-  term = "５２５６００";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "525788");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
-
-  term = "ＦＵＬＬＷＩＤＴＨ";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "fully");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
-
-  // Hiragana to Katakana
-  term = "あいうえお";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "アイエオ");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
-
-  term = "かきくけこ";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "カケコ");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
-
-  // Latin accents
-  term = "Zürich";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "zurg");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
-
-  term = "après-midi";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "apreciate");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
-
-  term = "días";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "diamond");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("día"));
-}
-
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/map/map-normalizer.cc b/icing/transform/map/map-normalizer.cc
index 61fce65..c888551 100644
--- a/icing/transform/map/map-normalizer.cc
+++ b/icing/transform/map/map-normalizer.cc
@@ -14,7 +14,8 @@
 
 #include "icing/transform/map/map-normalizer.h"
 
-#include <cctype>
+#include <ctype.h>
+
 #include <string>
 #include <string_view>
 #include <unordered_map>
@@ -22,7 +23,6 @@
 
 #include "icing/absl_ports/str_cat.h"
 #include "icing/transform/map/normalization-map.h"
-#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/logging.h"
 #include "unicode/utypes.h"
@@ -30,70 +30,48 @@
 namespace icing {
 namespace lib {
 
-namespace {
-
-UChar32 NormalizeChar(UChar32 c) {
-  if (i18n_utils::GetUtf16Length(c) > 1) {
-    // All the characters we need to normalize can be encoded into a
-    // single char16_t. If this character needs more than 1 char16_t code
-    // unit, we can skip normalization and append it directly.
-    return c;
-  }
-
-  // The original character can be encoded into a single char16_t.
-  const std::unordered_map<char16_t, char16_t>* normalization_map =
-      GetNormalizationMap();
-  if (normalization_map == nullptr) {
-    // Normalization map couldn't be properly initialized, append the original
-    // character.
-    ICING_LOG(WARNING) << "Unable to get a valid pointer to normalization map!";
-    return c;
-  }
-  auto iterator = normalization_map->find(static_cast<char16_t>(c));
-  if (iterator == normalization_map->end()) {
-    // Normalization mapping not found, append the original character.
-    return c;
-  }
-
-  // Found a normalization mapping. The normalized character (stored in a
-  // char16_t) can have 1 or 2 bytes.
-  if (i18n_utils::IsAscii(iterator->second)) {
-    // The normalized character has 1 byte. It may be an upper-case char.
-    // Lower-case it before returning it.
-    return std::tolower(static_cast<char>(iterator->second));
-  } else {
-    return iterator->second;
-  }
-}
-
-}  // namespace
-
 std::string MapNormalizer::NormalizeTerm(std::string_view term) const {
   std::string normalized_text;
   normalized_text.reserve(term.length());
 
-  int current_pos = 0;
-  while (current_pos < term.length()) {
-    if (i18n_utils::IsAscii(term[current_pos])) {
-      normalized_text.push_back(std::tolower(term[current_pos]));
-      ++current_pos;
-    } else {
-      UChar32 uchar32 =
-          i18n_utils::GetUChar32At(term.data(), term.length(), current_pos);
+  for (int i = 0; i < term.length(); ++i) {
+    if (i18n_utils::IsAscii(term[i])) {
+      // The original character has 1 byte.
+      normalized_text.push_back(std::tolower(term[i]));
+    } else if (i18n_utils::IsLeadUtf8Byte(term[i])) {
+      UChar32 uchar32 = i18n_utils::GetUChar32At(term.data(), term.length(), i);
       if (uchar32 == i18n_utils::kInvalidUChar32) {
         ICING_LOG(WARNING) << "Unable to get uchar32 from " << term
-                           << " at position" << current_pos;
-        ++current_pos;
+                           << " at position" << i;
         continue;
       }
-      UChar32 normalized_char32 = NormalizeChar(uchar32);
-      if (i18n_utils::IsAscii(normalized_char32)) {
-        normalized_text.push_back(normalized_char32);
+      int utf8_length = i18n_utils::GetUtf8Length(uchar32);
+      if (i18n_utils::GetUtf16Length(uchar32) > 1) {
+        // All the characters we need to normalize can be encoded into a
+        // single char16_t. If this character needs more than 1 char16_t code
+        // unit, we can skip normalization and append it directly.
+        absl_ports::StrAppend(&normalized_text, term.substr(i, utf8_length));
+        continue;
+      }
+      // The original character can be encoded into a single char16_t.
+      const std::unordered_map<char16_t, char16_t>& normalization_map =
+          GetNormalizationMap();
+      auto iterator = normalization_map.find(static_cast<char16_t>(uchar32));
+      if (iterator != normalization_map.end()) {
+        // Found a normalization mapping. The normalized character (stored in a
+        // char16_t) can have 1 or 2 bytes.
+        if (i18n_utils::IsAscii(iterator->second)) {
+          // The normalized character has 1 byte.
+          normalized_text.push_back(
+              std::tolower(static_cast<char>(iterator->second)));
+        } else {
+          // The normalized character has 2 bytes.
+          i18n_utils::AppendUchar32ToUtf8(&normalized_text, iterator->second);
+        }
       } else {
-        // The normalized character has 2 bytes.
-        i18n_utils::AppendUchar32ToUtf8(&normalized_text, normalized_char32);
+        // Normalization mapping not found, append the original character.
+        absl_ports::StrAppend(&normalized_text, term.substr(i, utf8_length));
       }
-      current_pos += i18n_utils::GetUtf8Length(uchar32);
     }
   }
 
@@ -104,27 +82,5 @@ std::string MapNormalizer::NormalizeTerm(std::string_view term) const {
   return normalized_text;
 }
 
-CharacterIterator MapNormalizer::FindNormalizedMatchEndPosition(
-    std::string_view term, std::string_view normalized_term) const {
-  CharacterIterator char_itr(term);
-  CharacterIterator normalized_char_itr(normalized_term);
-  while (char_itr.utf8_index() < term.length() &&
-         normalized_char_itr.utf8_index() < normalized_term.length()) {
-    UChar32 c = char_itr.GetCurrentChar();
-    if (i18n_utils::IsAscii(c)) {
-      c = std::tolower(c);
-    } else {
-      c = NormalizeChar(c);
-    }
-    UChar32 normalized_c = normalized_char_itr.GetCurrentChar();
-    if (c != normalized_c) {
-      return char_itr;
-    }
-    char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
-    normalized_char_itr.AdvanceToUtf32(normalized_char_itr.utf32_index() + 1);
-  }
-  return char_itr;
-}
-
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/map/map-normalizer.h b/icing/transform/map/map-normalizer.h
index ed996ae..f9c0e42 100644
--- a/icing/transform/map/map-normalizer.h
+++ b/icing/transform/map/map-normalizer.h
@@ -19,7 +19,6 @@
 #include <string_view>
 
 #include "icing/transform/normalizer.h"
-#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -40,17 +39,6 @@ class MapNormalizer : public Normalizer {
   // Read more mapping details in normalization-map.cc
   std::string NormalizeTerm(std::string_view term) const override;
 
-  // Returns a CharacterIterator pointing to one past the end of the segment of
-  // term that (once normalized) matches with normalized_term.
-  //
-  // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return
-  // CharacterIterator(u8:4, u16:4, u32:4).
-  //
-  // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return
-  // CharacterIterator(u8:0, u16:0, u32:0).
-  CharacterIterator FindNormalizedMatchEndPosition(
-      std::string_view term, std::string_view normalized_term) const override;
-
  private:
   // The maximum term length allowed after normalization.
   int max_term_byte_size_;
diff --git a/icing/transform/map/map-normalizer_benchmark.cc b/icing/transform/map/map-normalizer_benchmark.cc
index 8268541..691afc6 100644
--- a/icing/transform/map/map-normalizer_benchmark.cc
+++ b/icing/transform/map/map-normalizer_benchmark.cc
@@ -143,104 +143,6 @@ BENCHMARK(BM_NormalizeHiragana)
     ->Arg(2048000)
     ->Arg(4096000);
 
-void BM_UppercaseSubTokenLength(benchmark::State& state) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Normalizer> normalizer,
-      normalizer_factory::Create(
-
-          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
-
-  std::string input_string(state.range(0), 'A');
-  std::string normalized_input_string(state.range(0), 'a');
-  for (auto _ : state) {
-    normalizer->FindNormalizedMatchEndPosition(input_string,
-                                               normalized_input_string);
-  }
-}
-BENCHMARK(BM_UppercaseSubTokenLength)
-    ->Arg(1000)
-    ->Arg(2000)
-    ->Arg(4000)
-    ->Arg(8000)
-    ->Arg(16000)
-    ->Arg(32000)
-    ->Arg(64000)
-    ->Arg(128000)
-    ->Arg(256000)
-    ->Arg(384000)
-    ->Arg(512000)
-    ->Arg(1024000)
-    ->Arg(2048000)
-    ->Arg(4096000);
-
-void BM_AccentSubTokenLength(benchmark::State& state) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Normalizer> normalizer,
-      normalizer_factory::Create(
-          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
-
-  std::string input_string;
-  std::string normalized_input_string;
-  while (input_string.length() < state.range(0)) {
-    input_string.append("àáâãā");
-    normalized_input_string.append("aaaaa");
-  }
-
-  for (auto _ : state) {
-    normalizer->FindNormalizedMatchEndPosition(input_string,
-                                               normalized_input_string);
-  }
-}
-BENCHMARK(BM_AccentSubTokenLength)
-    ->Arg(1000)
-    ->Arg(2000)
-    ->Arg(4000)
-    ->Arg(8000)
-    ->Arg(16000)
-    ->Arg(32000)
-    ->Arg(64000)
-    ->Arg(128000)
-    ->Arg(256000)
-    ->Arg(384000)
-    ->Arg(512000)
-    ->Arg(1024000)
-    ->Arg(2048000)
-    ->Arg(4096000);
-
-void BM_HiraganaSubTokenLength(benchmark::State& state) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Normalizer> normalizer,
-      normalizer_factory::Create(
-          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
-
-  std::string input_string;
-  std::string normalized_input_string;
-  while (input_string.length() < state.range(0)) {
-    input_string.append("あいうえお");
-    normalized_input_string.append("アイウエオ");
-  }
-
-  for (auto _ : state) {
-    normalizer->FindNormalizedMatchEndPosition(input_string,
-                                               normalized_input_string);
-  }
-}
-BENCHMARK(BM_HiraganaSubTokenLength)
-    ->Arg(1000)
-    ->Arg(2000)
-    ->Arg(4000)
-    ->Arg(8000)
-    ->Arg(16000)
-    ->Arg(32000)
-    ->Arg(64000)
-    ->Arg(128000)
-    ->Arg(256000)
-    ->Arg(384000)
-    ->Arg(512000)
-    ->Arg(1024000)
-    ->Arg(2048000)
-    ->Arg(4096000);
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/transform/map/map-normalizer_test.cc b/icing/transform/map/map-normalizer_test.cc
index adc5623..b62ae0e 100644
--- a/icing/transform/map/map-normalizer_test.cc
+++ b/icing/transform/map/map-normalizer_test.cc
@@ -23,7 +23,6 @@
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
-#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -200,104 +199,6 @@ TEST(MapNormalizerTest, Truncate) {
   }
 }
 
-TEST(MapNormalizerTest, PrefixMatchLength) {
-  // Verify that FindNormalizedMatchEndPosition will properly find the length of
-  // the prefix match when given a non-normalized term and a normalized term
-  // is a prefix of the non-normalized one.
-  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
-                                                  /*max_term_byte_size=*/1000));
-
-  // Upper to lower
-  std::string term = "MDI";
-  CharacterIterator match_end =
-      normalizer->FindNormalizedMatchEndPosition(term, "md");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("MD"));
-
-  term = "Icing";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "icin");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Icin"));
-
-  // Full-width
-  term = "５２５６００";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "525");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
-
-  term = "ＦＵＬＬＷＩＤＴＨ";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "full");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
-
-  // Hiragana to Katakana
-  term = "あいうえお";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "アイ");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
-
-  term = "かきくけこ";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "カ");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
-
-  // Latin accents
-  term = "Zürich";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "zur");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
-
-  term = "après-midi";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "apre");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
-
-  term = "Buenos días";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "buenos di");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Buenos dí"));
-}
-
-TEST(MapNormalizerTest, SharedPrefixMatchLength) {
-  // Verify that FindNormalizedMatchEndPosition will properly find the length of
-  // the prefix match when given a non-normalized term and a normalized term
-  // that share a common prefix.
-  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
-                                                  /*max_term_byte_size=*/1000));
-
-  // Upper to lower
-  std::string term = "MDI";
-  CharacterIterator match_end =
-      normalizer->FindNormalizedMatchEndPosition(term, "mgm");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("M"));
-
-  term = "Icing";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "icky");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Ic"));
-
-  // Full-width
-  term = "５２５６００";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "525788");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
-
-  term = "ＦＵＬＬＷＩＤＴＨ";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "fully");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
-
-  // Hiragana to Katakana
-  term = "あいうえお";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "アイエオ");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
-
-  term = "かきくけこ";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "カケコ");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
-
-  // Latin accents
-  term = "Zürich";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "zurg");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
-
-  term = "après-midi";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "apreciate");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
-
-  term = "días";
-  match_end = normalizer->FindNormalizedMatchEndPosition(term, "diamond");
-  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("día"));
-}
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/transform/map/normalization-map.cc b/icing/transform/map/normalization-map.cc
index 0994ab8..c318036 100644
--- a/icing/transform/map/normalization-map.cc
+++ b/icing/transform/map/normalization-map.cc
@@ -691,21 +691,19 @@ constexpr NormalizationPair kNormalizationMappings[] = {
 
 }  // namespace
 
-const std::unordered_map<char16_t, char16_t> *GetNormalizationMap() {
+const std::unordered_map<char16_t, char16_t>& GetNormalizationMap() {
   // The map is allocated dynamically the first time this function is executed.
-  static const std::unordered_map<char16_t, char16_t> *const normalization_map =
-      [] {
-        auto *map = new std::unordered_map<char16_t, char16_t>();
-        // Size of all the mappings is about 2.5 KiB.
-        constexpr int numMappings =
-            sizeof(kNormalizationMappings) / sizeof(NormalizationPair);
-        map->reserve(numMappings);
-        for (size_t i = 0; i < numMappings; ++i) {
-          map->emplace(kNormalizationMappings[i].from,
-                       kNormalizationMappings[i].to);
-        }
-        return map;
-      }();
+  static const std::unordered_map<char16_t, char16_t> normalization_map = [] {
+    std::unordered_map<char16_t, char16_t> map;
+    // Size of all the mappings is about 2.5 KiB.
+    constexpr int numMappings =
+        sizeof(kNormalizationMappings) / sizeof(NormalizationPair);
+    map.reserve(numMappings);
+    for (size_t i = 0; i < numMappings; ++i) {
+      map.emplace(kNormalizationMappings[i].from, kNormalizationMappings[i].to);
+    }
+    return map;
+  }();
 
   return normalization_map;
 }
diff --git a/icing/transform/map/normalization-map.h b/icing/transform/map/normalization-map.h
index ac7872b..aea85bd 100644
--- a/icing/transform/map/normalization-map.h
+++ b/icing/transform/map/normalization-map.h
@@ -23,7 +23,7 @@ namespace lib {
 // Returns a map containing normalization mappings. A mapping (A -> B) means
 // that we'll transform every character 'A' into 'B'. See normalization-map.cc
 // for mapping details.
-const std::unordered_map<char16_t, char16_t>* GetNormalizationMap();
+const std::unordered_map<char16_t, char16_t>& GetNormalizationMap();
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/normalizer.h b/icing/transform/normalizer.h
index 2110f0f..4cbfa63 100644
--- a/icing/transform/normalizer.h
+++ b/icing/transform/normalizer.h
@@ -20,7 +20,6 @@
 #include <string_view>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -40,17 +39,6 @@ class Normalizer {
   // Normalizes the input term based on rules. See implementation classes for
   // specific transformation rules.
   virtual std::string NormalizeTerm(std::string_view term) const = 0;
-
-  // Returns a CharacterIterator pointing to one past the end of the segment of
-  // term that (once normalized) matches with normalized_term.
-  //
-  // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return
-  // CharacterIterator(u8:4, u16:4, u32:4).
-  //
-  // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return
-  // CharacterIterator(u8:0, u16:0, u32:0).
-  virtual CharacterIterator FindNormalizedMatchEndPosition(
-      std::string_view term, std::string_view normalized_term) const = 0;
 };
 
 }  // namespace lib
diff --git a/icing/transform/simple/none-normalizer-factory.cc b/icing/transform/simple/none-normalizer-factory.cc
new file mode 100644
index 0000000..6b35270
--- /dev/null
+++ b/icing/transform/simple/none-normalizer-factory.cc
@@ -0,0 +1,53 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
+#define ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
+
+#include <memory>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/transform/normalizer.h"
+#include "icing/transform/simple/none-normalizer.h"
+
+namespace icing {
+namespace lib {
+
+namespace normalizer_factory {
+
+// Creates a dummy normalizer. The term is not normalized, but
+// the text will be truncated to max_term_byte_size if it exceeds the max size.
+//
+// Returns:
+//   A normalizer on success
+//   INVALID_ARGUMENT if max_term_byte_size <= 0
+//   INTERNAL_ERROR on errors
+libtextclassifier3::StatusOr<std::unique_ptr<Normalizer>> Create(
+    int max_term_byte_size) {
+  if (max_term_byte_size <= 0) {
+    return absl_ports::InvalidArgumentError(
+        "max_term_byte_size must be greater than zero.");
+  }
+
+  return std::make_unique<NoneNormalizer>(max_term_byte_size);
+}
+
+}  // namespace normalizer_factory
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
diff --git a/icing/transform/simple/none-normalizer.h b/icing/transform/simple/none-normalizer.h
new file mode 100644
index 0000000..47085e1
--- /dev/null
+++ b/icing/transform/simple/none-normalizer.h
@@ -0,0 +1,51 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
+#define ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
+
+#include <string>
+#include <string_view>
+
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+// This normalizer is not meant for production use. Currently only used to get
+// the Icing library to compile in Jetpack.
+//
+// No normalization is done, but the term is truncated if it exceeds
+// max_term_byte_size.
+class NoneNormalizer : public Normalizer {
+ public:
+  explicit NoneNormalizer(int max_term_byte_size)
+      : max_term_byte_size_(max_term_byte_size){};
+
+  std::string NormalizeTerm(std::string_view term) const override {
+    if (term.length() > max_term_byte_size_) {
+      return std::string(term.substr(0, max_term_byte_size_));
+    }
+    return std::string(term);
+  }
+
+ private:
+  // The maximum term length allowed after normalization.
+  int max_term_byte_size_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
diff --git a/icing/transform/simple/none-normalizer_test.cc b/icing/transform/simple/none-normalizer_test.cc
new file mode 100644
index 0000000..e074828
--- /dev/null
+++ b/icing/transform/simple/none-normalizer_test.cc
@@ -0,0 +1,74 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::Eq;
+
+TEST(NoneNormalizerTest, Creation) {
+  EXPECT_THAT(normalizer_factory::Create(
+                  /*max_term_byte_size=*/5),
+              IsOk());
+  EXPECT_THAT(normalizer_factory::Create(
+                  /*max_term_byte_size=*/0),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(normalizer_factory::Create(
+                  /*max_term_byte_size=*/-1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(IcuNormalizerTest, NoNormalizationDone) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
+                                                  /*max_term_byte_size=*/1000));
+  EXPECT_THAT(normalizer->NormalizeTerm(""), Eq(""));
+  EXPECT_THAT(normalizer->NormalizeTerm("hello world"), Eq("hello world"));
+
+  // Capitalization
+  EXPECT_THAT(normalizer->NormalizeTerm("MDI"), Eq("MDI"));
+
+  // Accents
+  EXPECT_THAT(normalizer->NormalizeTerm("Zürich"), Eq("Zürich"));
+
+  // Full-width punctuation to ASCII punctuation
+  EXPECT_THAT(normalizer->NormalizeTerm("。，！？：”"), Eq("。，！？：”"));
+
+  // Half-width katakana
+  EXPECT_THAT(normalizer->NormalizeTerm("ｶ"), Eq("ｶ"));
+}
+
+TEST(NoneNormalizerTest, Truncate) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
+                                                  /*max_term_byte_size=*/5));
+
+  // Won't be truncated
+  EXPECT_THAT(normalizer->NormalizeTerm("hi"), Eq("hi"));
+  EXPECT_THAT(normalizer->NormalizeTerm("hello"), Eq("hello"));
+
+  // Truncated to length 5.
+  EXPECT_THAT(normalizer->NormalizeTerm("hello!"), Eq("hello"));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/character-iterator.cc b/icing/util/character-iterator.cc
index 0ab1e50..6c5faef 100644
--- a/icing/util/character-iterator.cc
+++ b/icing/util/character-iterator.cc
@@ -14,8 +14,6 @@
 
 #include "icing/util/character-iterator.h"
 
-#include "icing/util/i18n-utils.h"
-
 namespace icing {
 namespace lib {
 
@@ -32,37 +30,22 @@ int GetUTF8StartPosition(std::string_view text, int current_byte_index) {
 
 }  // namespace
 
-UChar32 CharacterIterator::GetCurrentChar() {
-  if (cached_current_char_ == i18n_utils::kInvalidUChar32) {
-    // Our indices point to the right character, we just need to read that
-    // character. No need to worry about an error. If GetUChar32At fails, then
-    // current_char will be i18n_utils::kInvalidUChar32.
-    cached_current_char_ =
-        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
-  }
-  return cached_current_char_;
-}
-
 bool CharacterIterator::MoveToUtf8(int desired_utf8_index) {
   return (desired_utf8_index > utf8_index_) ? AdvanceToUtf8(desired_utf8_index)
                                             : RewindToUtf8(desired_utf8_index);
 }
 
 bool CharacterIterator::AdvanceToUtf8(int desired_utf8_index) {
-  ResetToStartIfNecessary();
-
   if (desired_utf8_index > text_.length()) {
     // Enforce the requirement.
     return false;
   }
   // Need to work forwards.
-  UChar32 uchar32 = cached_current_char_;
   while (utf8_index_ < desired_utf8_index) {
-    uchar32 =
+    UChar32 uchar32 =
         i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
     if (uchar32 == i18n_utils::kInvalidUChar32) {
       // Unable to retrieve a valid UTF-32 character at the previous position.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     int utf8_length = i18n_utils::GetUtf8Length(uchar32);
@@ -74,8 +57,6 @@ bool CharacterIterator::AdvanceToUtf8(int desired_utf8_index) {
     utf16_index_ += i18n_utils::GetUtf16Length(uchar32);
     ++utf32_index_;
   }
-  cached_current_char_ =
-      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
   return true;
 }
 
@@ -85,30 +66,21 @@ bool CharacterIterator::RewindToUtf8(int desired_utf8_index) {
     return false;
   }
   // Need to work backwards.
-  UChar32 uchar32 = cached_current_char_;
   while (utf8_index_ > desired_utf8_index) {
-    int utf8_index = utf8_index_ - 1;
-    utf8_index = GetUTF8StartPosition(text_, utf8_index);
-    if (utf8_index < 0) {
+    --utf8_index_;
+    utf8_index_ = GetUTF8StartPosition(text_, utf8_index_);
+    if (utf8_index_ < 0) {
       // Somehow, there wasn't a single UTF-8 lead byte at
       // requested_byte_index or an earlier byte.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     // We've found the start of a unicode char!
-    uchar32 =
-        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
-    int expected_length = utf8_index_ - utf8_index;
-    if (uchar32 == i18n_utils::kInvalidUChar32 ||
-        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
-      // Either unable to retrieve a valid UTF-32 character at the previous
-      // position or we skipped past an invalid sequence while seeking the
-      // previous start position.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
+    UChar32 uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+    if (uchar32 == i18n_utils::kInvalidUChar32) {
+      // Unable to retrieve a valid UTF-32 character at the previous position.
       return false;
     }
-    cached_current_char_ = uchar32;
-    utf8_index_ = utf8_index;
     utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
     --utf32_index_;
   }
@@ -122,15 +94,11 @@ bool CharacterIterator::MoveToUtf16(int desired_utf16_index) {
 }
 
 bool CharacterIterator::AdvanceToUtf16(int desired_utf16_index) {
-  ResetToStartIfNecessary();
-
-  UChar32 uchar32 = cached_current_char_;
   while (utf16_index_ < desired_utf16_index) {
-    uchar32 =
+    UChar32 uchar32 =
         i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
     if (uchar32 == i18n_utils::kInvalidUChar32) {
       // Unable to retrieve a valid UTF-32 character at the previous position.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     int utf16_length = i18n_utils::GetUtf16Length(uchar32);
@@ -141,15 +109,12 @@ bool CharacterIterator::AdvanceToUtf16(int desired_utf16_index) {
     int utf8_length = i18n_utils::GetUtf8Length(uchar32);
     if (utf8_index_ + utf8_length > text_.length()) {
       // Enforce the requirement.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     utf8_index_ += utf8_length;
     utf16_index_ += utf16_length;
     ++utf32_index_;
   }
-  cached_current_char_ =
-      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
   return true;
 }
 
@@ -157,30 +122,21 @@ bool CharacterIterator::RewindToUtf16(int desired_utf16_index) {
   if (desired_utf16_index < 0) {
     return false;
   }
-  UChar32 uchar32 = cached_current_char_;
   while (utf16_index_ > desired_utf16_index) {
-    int utf8_index = utf8_index_ - 1;
-    utf8_index = GetUTF8StartPosition(text_, utf8_index);
-    if (utf8_index < 0) {
+    --utf8_index_;
+    utf8_index_ = GetUTF8StartPosition(text_, utf8_index_);
+    if (utf8_index_ < 0) {
       // Somehow, there wasn't a single UTF-8 lead byte at
       // requested_byte_index or an earlier byte.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     // We've found the start of a unicode char!
-    uchar32 =
-        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
-    int expected_length = utf8_index_ - utf8_index;
-    if (uchar32 == i18n_utils::kInvalidUChar32 ||
-        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
-      // Either unable to retrieve a valid UTF-32 character at the previous
-      // position or we skipped past an invalid sequence while seeking the
-      // previous start position.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
+    UChar32 uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+    if (uchar32 == i18n_utils::kInvalidUChar32) {
+      // Unable to retrieve a valid UTF-32 character at the previous position.
       return false;
     }
-    cached_current_char_ = uchar32;
-    utf8_index_ = utf8_index;
     utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
     --utf32_index_;
   }
@@ -194,30 +150,23 @@ bool CharacterIterator::MoveToUtf32(int desired_utf32_index) {
 }
 
 bool CharacterIterator::AdvanceToUtf32(int desired_utf32_index) {
-  ResetToStartIfNecessary();
-
-  UChar32 uchar32 = cached_current_char_;
   while (utf32_index_ < desired_utf32_index) {
-    uchar32 =
+    UChar32 uchar32 =
         i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
     if (uchar32 == i18n_utils::kInvalidUChar32) {
       // Unable to retrieve a valid UTF-32 character at the previous position.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     int utf16_length = i18n_utils::GetUtf16Length(uchar32);
     int utf8_length = i18n_utils::GetUtf8Length(uchar32);
     if (utf8_index_ + utf8_length > text_.length()) {
       // Enforce the requirement.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     utf8_index_ += utf8_length;
     utf16_index_ += utf16_length;
     ++utf32_index_;
   }
-  cached_current_char_ =
-      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
   return true;
 }
 
@@ -225,45 +174,26 @@ bool CharacterIterator::RewindToUtf32(int desired_utf32_index) {
   if (desired_utf32_index < 0) {
     return false;
   }
-  UChar32 uchar32 = cached_current_char_;
   while (utf32_index_ > desired_utf32_index) {
-    int utf8_index = utf8_index_ - 1;
-    utf8_index = GetUTF8StartPosition(text_, utf8_index);
-    if (utf8_index < 0) {
+    --utf8_index_;
+    utf8_index_ = GetUTF8StartPosition(text_, utf8_index_);
+    if (utf8_index_ < 0) {
       // Somehow, there wasn't a single UTF-8 lead byte at
       // requested_byte_index or an earlier byte.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
       return false;
     }
     // We've found the start of a unicode char!
-    uchar32 =
-        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
-    int expected_length = utf8_index_ - utf8_index;
-    if (uchar32 == i18n_utils::kInvalidUChar32 ||
-        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
-      // Either unable to retrieve a valid UTF-32 character at the previous
-      // position or we skipped past an invalid sequence while seeking the
-      // previous start position.
-      cached_current_char_ = i18n_utils::kInvalidUChar32;
+    UChar32 uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+    if (uchar32 == i18n_utils::kInvalidUChar32) {
+      // Unable to retrieve a valid UTF-32 character at the previous position.
       return false;
     }
-    cached_current_char_ = uchar32;
-    utf8_index_ = utf8_index;
     utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
     --utf32_index_;
   }
   return true;
 }
 
-void CharacterIterator::ResetToStartIfNecessary() {
-  if (utf8_index_ < 0 || utf16_index_ < 0 || utf32_index_ < 0) {
-    utf8_index_ = 0;
-    utf16_index_ = 0;
-    utf32_index_ = 0;
-    cached_current_char_ =
-        i18n_utils::GetUChar32At(text_.data(), text_.length(), 0);
-  }
-}
-
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/util/character-iterator.h b/icing/util/character-iterator.h
index 893718a..9df7bee 100644
--- a/icing/util/character-iterator.h
+++ b/icing/util/character-iterator.h
@@ -29,15 +29,10 @@ class CharacterIterator {
   CharacterIterator(std::string_view text, int utf8_index, int utf16_index,
                     int utf32_index)
       : text_(text),
-        cached_current_char_(i18n_utils::kInvalidUChar32),
         utf8_index_(utf8_index),
         utf16_index_(utf16_index),
         utf32_index_(utf32_index) {}
 
-  // Returns the character that the iterator currently points to.
-  // i18n_utils::kInvalidUChar32 if unable to read that character.
-  UChar32 GetCurrentChar();
-
   // Moves current position to desired_utf8_index.
   // REQUIRES: 0 <= desired_utf8_index <= text_.length()
   bool MoveToUtf8(int desired_utf8_index);
@@ -87,8 +82,6 @@ class CharacterIterator {
   int utf32_index() const { return utf32_index_; }
 
   bool operator==(const CharacterIterator& rhs) const {
-    // cached_current_char_ is just that: a cached value. As such, it's not
-    // considered for equality.
     return text_ == rhs.text_ && utf8_index_ == rhs.utf8_index_ &&
            utf16_index_ == rhs.utf16_index_ && utf32_index_ == rhs.utf32_index_;
   }
@@ -99,12 +92,7 @@ class CharacterIterator {
   }
 
  private:
-  // Resets the character iterator to the start of the text if any of the
-  // indices are negative.
-  void ResetToStartIfNecessary();
-
   std::string_view text_;
-  UChar32 cached_current_char_;
   int utf8_index_;
   int utf16_index_;
   int utf32_index_;
diff --git a/icing/util/character-iterator_test.cc b/icing/util/character-iterator_test.cc
deleted file mode 100644
index 195a47b..0000000
--- a/icing/util/character-iterator_test.cc
+++ /dev/null
@@ -1,266 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/util/character-iterator.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/testing/icu-i18n-test-utils.h"
-
-namespace icing {
-namespace lib {
-
-using ::testing::Eq;
-using ::testing::IsFalse;
-using ::testing::IsTrue;
-
-TEST(CharacterIteratorTest, BasicUtf8) {
-  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
-  CharacterIterator iterator(kText);
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
-
-  EXPECT_THAT(iterator.AdvanceToUtf8(4), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
-                                   /*utf32_index=*/2)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf8(18), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
-                                   /*utf32_index=*/15)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf8(28), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
-                                   /*utf32_index=*/25)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf8(29), IsTrue());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
-                                   /*utf32_index=*/26)));
-
-  EXPECT_THAT(iterator.RewindToUtf8(28), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
-                                   /*utf32_index=*/25)));
-
-  EXPECT_THAT(iterator.RewindToUtf8(18), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
-                                   /*utf32_index=*/15)));
-
-  EXPECT_THAT(iterator.RewindToUtf8(4), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
-                                   /*utf32_index=*/2)));
-
-  EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
-                                   /*utf32_index=*/0)));
-}
-
-TEST(CharacterIteratorTest, BasicUtf16) {
-  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
-  CharacterIterator iterator(kText);
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
-
-  EXPECT_THAT(iterator.AdvanceToUtf16(2), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
-                                   /*utf32_index=*/2)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf16(15), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
-                                   /*utf32_index=*/15)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf16(25), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
-                                   /*utf32_index=*/25)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf16(26), IsTrue());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
-                                   /*utf32_index=*/26)));
-
-  EXPECT_THAT(iterator.RewindToUtf16(25), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
-                                   /*utf32_index=*/25)));
-
-  EXPECT_THAT(iterator.RewindToUtf16(15), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
-                                   /*utf32_index=*/15)));
-
-  EXPECT_THAT(iterator.RewindToUtf16(2), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
-                                   /*utf32_index=*/2)));
-
-  EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
-                                   /*utf32_index=*/0)));
-}
-
-TEST(CharacterIteratorTest, BasicUtf32) {
-  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
-  CharacterIterator iterator(kText);
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
-
-  EXPECT_THAT(iterator.AdvanceToUtf32(2), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
-                                   /*utf32_index=*/2)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf32(15), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
-                                   /*utf32_index=*/15)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf32(25), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
-                                   /*utf32_index=*/25)));
-
-  EXPECT_THAT(iterator.AdvanceToUtf32(26), IsTrue());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
-                                   /*utf32_index=*/26)));
-
-  EXPECT_THAT(iterator.RewindToUtf32(25), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
-                                   /*utf32_index=*/25)));
-
-  EXPECT_THAT(iterator.RewindToUtf32(15), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
-                                   /*utf32_index=*/15)));
-
-  EXPECT_THAT(iterator.RewindToUtf32(2), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
-                                   /*utf32_index=*/2)));
-
-  EXPECT_THAT(iterator.RewindToUtf32(0), IsTrue());
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
-  EXPECT_THAT(iterator,
-              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
-                                   /*utf32_index=*/0)));
-}
-
-TEST(CharacterIteratorTest, InvalidUtf) {
-  // "\255" is an invalid sequence.
-  constexpr std::string_view kText = "foo \255 bar";
-  CharacterIterator iterator(kText);
-
-  // Try to advance to the 'b' in 'bar'. This will fail and leave us pointed at
-  // the invalid sequence '\255'. Get CurrentChar() should return an invalid
-  // character.
-  EXPECT_THAT(iterator.AdvanceToUtf8(6), IsFalse());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
-  CharacterIterator exp_iterator(kText, /*utf8_index=*/4, /*utf16_index=*/4,
-                                 /*utf32_index=*/4);
-  EXPECT_THAT(iterator, Eq(exp_iterator));
-
-  EXPECT_THAT(iterator.AdvanceToUtf16(6), IsFalse());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
-  EXPECT_THAT(iterator, Eq(exp_iterator));
-
-  EXPECT_THAT(iterator.AdvanceToUtf32(6), IsFalse());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
-  EXPECT_THAT(iterator, Eq(exp_iterator));
-
-  // Create the iterator with it pointing at the 'b' in 'bar'.
-  iterator = CharacterIterator(kText, /*utf8_index=*/6, /*utf16_index=*/6,
-                               /*utf32_index=*/6);
-  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
-
-  // Try to advance to the last 'o' in 'foo'. This will fail and leave us
-  // pointed at the ' ' before the invalid sequence '\255'.
-  exp_iterator = CharacterIterator(kText, /*utf8_index=*/5, /*utf16_index=*/5,
-                                   /*utf32_index=*/5);
-  EXPECT_THAT(iterator.RewindToUtf8(2), IsFalse());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
-  EXPECT_THAT(iterator, Eq(exp_iterator));
-
-  EXPECT_THAT(iterator.RewindToUtf16(2), IsFalse());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
-  EXPECT_THAT(iterator, Eq(exp_iterator));
-
-  EXPECT_THAT(iterator.RewindToUtf32(2), IsFalse());
-  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
-  EXPECT_THAT(iterator, Eq(exp_iterator));
-}
-
-TEST(CharacterIteratorTest, MoveToUtfNegativeIndex) {
-  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
-
-  CharacterIterator iterator_utf8(kText, /*utf8_index=*/-1, /*utf16_index=*/0,
-                             /*utf32_index=*/0);
-  // We should be able to successfully move when the index is negative.
-  EXPECT_THAT(iterator_utf8.MoveToUtf8(0), IsTrue());
-  // The character cache should be reset and contain the first character when
-  // resetting to index 0.
-  EXPECT_THAT(UCharToString(iterator_utf8.GetCurrentChar()), Eq("¿"));
-  EXPECT_THAT(iterator_utf8.utf8_index(), Eq(0));
-  EXPECT_THAT(iterator_utf8.utf16_index(), Eq(0));
-  EXPECT_THAT(iterator_utf8.utf32_index(), Eq(0));
-
-  CharacterIterator iterator_utf16(kText, /*utf8_index=*/0, /*utf16_index=*/-1,
-                             /*utf32_index=*/0);
-  EXPECT_THAT(iterator_utf16.MoveToUtf16(1), IsTrue());
-  EXPECT_THAT(iterator_utf16.GetCurrentChar(), Eq('D'));
-  EXPECT_THAT(iterator_utf16.utf8_index(), Eq(2));
-  EXPECT_THAT(iterator_utf16.utf16_index(), Eq(1));
-  EXPECT_THAT(iterator_utf16.utf32_index(), Eq(1));
-
-  CharacterIterator iterator_utf32(kText, /*utf8_index=*/0, /*utf16_index=*/0,
-                             /*utf32_index=*/-1);
-  EXPECT_THAT(iterator_utf32.MoveToUtf32(2), IsTrue());
-  EXPECT_THAT(UCharToString(iterator_utf32.GetCurrentChar()), Eq("ó"));
-  EXPECT_THAT(iterator_utf32.utf8_index(), Eq(3));
-  EXPECT_THAT(iterator_utf32.utf16_index(), Eq(2));
-  EXPECT_THAT(iterator_utf32.utf32_index(), Eq(2));
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index 45c23e0..cb013d7 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -46,15 +46,15 @@ constexpr char kPropertyEmails[] = "emails";
 constexpr char kDefaultNamespace[] = "icing";
 constexpr char kDefaultString[] = "This is a string.";
 
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto::Cardinality::OPTIONAL;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
-    PropertyConfigProto::Cardinality::REQUIRED;
-constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
-    PropertyConfigProto::Cardinality::REPEATED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+    PropertyConfigProto_Cardinality_Code_REPEATED;
 
-constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
-    PropertyConfigProto::DataType::STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+    PropertyConfigProto_DataType_Code_STRING;
 
 class DocumentValidatorTest : public ::testing::Test {
  protected:
@@ -93,11 +93,9 @@ class DocumentValidatorTest : public ::testing::Test {
                             .SetCardinality(CARDINALITY_REPEATED)))
             .Build();
 
-    schema_dir_ = GetTestTempDir() + "/schema_store";
-    ASSERT_TRUE(filesystem_.CreateDirectory(schema_dir_.c_str()));
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
-        SchemaStore::Create(&filesystem_, schema_dir_, &fake_clock_));
+        SchemaStore::Create(&filesystem_, GetTestTempDir(), &fake_clock_));
     ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
 
     document_validator_ =
@@ -124,7 +122,6 @@ class DocumentValidatorTest : public ::testing::Test {
                              SimpleEmailBuilder().Build());
   }
 
-  std::string schema_dir_;
   std::unique_ptr<DocumentValidator> document_validator_;
   std::unique_ptr<SchemaStore> schema_store_;
   Filesystem filesystem_;
diff --git a/icing/util/i18n-utils.cc b/icing/util/i18n-utils.cc
index ec327ad..cd0a227 100644
--- a/icing/util/i18n-utils.cc
+++ b/icing/util/i18n-utils.cc
@@ -116,8 +116,6 @@ bool IsAscii(char c) { return U8_IS_SINGLE((uint8_t)c); }
 
 bool IsAscii(UChar32 c) { return U8_LENGTH(c) == 1; }
 
-bool IsAlphaNumeric(UChar32 c) { return u_isalnum(c); }
-
 int GetUtf8Length(UChar32 c) { return U8_LENGTH(c); }
 
 int GetUtf16Length(UChar32 c) { return U16_LENGTH(c); }
diff --git a/icing/util/i18n-utils.h b/icing/util/i18n-utils.h
index 491df6b..82ae828 100644
--- a/icing/util/i18n-utils.h
+++ b/icing/util/i18n-utils.h
@@ -67,9 +67,6 @@ bool IsAscii(char c);
 // Checks if the Unicode char is within ASCII range.
 bool IsAscii(UChar32 c);
 
-// Checks if the Unicode char is alphanumeric.
-bool IsAlphaNumeric(UChar32 c);
-
 // Returns how many code units (char) are used for the UTF-8 encoding of this
 // Unicode character. Returns 0 if not valid.
 int GetUtf8Length(UChar32 c);
diff --git a/java/Android.bp b/java/Android.bp
index 6133230..ef417ba 100644
--- a/java/Android.bp
+++ b/java/Android.bp
@@ -32,6 +32,5 @@ java_library {
         "androidx.annotation_annotation",
     ],
     sdk_version: "current",
-    min_sdk_version: "Tiramisu",
     apex_available: ["com.android.appsearch"],
 }
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 95e0c84..1f5fb51 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -43,8 +43,6 @@ import com.google.android.icing.proto.SearchSpecProto;
 import com.google.android.icing.proto.SetSchemaResultProto;
 import com.google.android.icing.proto.StatusProto;
 import com.google.android.icing.proto.StorageInfoResultProto;
-import com.google.android.icing.proto.SuggestionResponse;
-import com.google.android.icing.proto.SuggestionSpecProto;
 import com.google.android.icing.proto.UsageReport;
 import com.google.protobuf.ExtensionRegistryLite;
 import com.google.protobuf.InvalidProtocolBufferException;
@@ -372,26 +370,6 @@ public class IcingSearchEngine implements Closeable {
   }
 
   @NonNull
-  public SuggestionResponse searchSuggestions(@NonNull SuggestionSpecProto suggestionSpec) {
-    byte[] suggestionResponseBytes = nativeSearchSuggestions(this, suggestionSpec.toByteArray());
-    if (suggestionResponseBytes == null) {
-      Log.e(TAG, "Received null suggestionResponseBytes from native.");
-      return SuggestionResponse.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return SuggestionResponse.parseFrom(suggestionResponseBytes, EXTENSION_REGISTRY_LITE);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing suggestionResponseBytes.", e);
-      return SuggestionResponse.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-  }
-
-  @NonNull
   public DeleteByNamespaceResultProto deleteByNamespace(@NonNull String namespace) {
     throwIfClosed();
 
@@ -626,7 +604,4 @@ public class IcingSearchEngine implements Closeable {
   private static native byte[] nativeGetStorageInfo(IcingSearchEngine instance);
 
   private static native byte[] nativeReset(IcingSearchEngine instance);
-
-  private static native byte[] nativeSearchSuggestions(
-      IcingSearchEngine instance, byte[] suggestionSpecBytes);
 }
diff --git a/java/tests/instrumentation/src/androidx/appsearch/smoketest/AndroidXSmokeTest.java b/java/tests/instrumentation/src/androidx/appsearch/smoketest/AppSearchSmokeTest.java
index 98b1b25..8fae104 100644
--- a/java/tests/instrumentation/src/androidx/appsearch/smoketest/AndroidXSmokeTest.java
+++ b/java/tests/instrumentation/src/androidx/appsearch/smoketest/AppSearchSmokeTest.java
@@ -24,7 +24,6 @@ import androidx.appsearch.app.AppSearchSchema;
 import androidx.appsearch.app.AppSearchSchema.PropertyConfig;
 import androidx.appsearch.app.AppSearchSchema.StringPropertyConfig;
 import androidx.appsearch.app.AppSearchSession;
-import androidx.appsearch.app.GenericDocument;
 import androidx.appsearch.app.PutDocumentsRequest;
 import androidx.appsearch.app.SearchResult;
 import androidx.appsearch.app.SearchResults;
@@ -33,16 +32,15 @@ import androidx.appsearch.app.SetSchemaRequest;
 import androidx.appsearch.localstorage.LocalStorage;
 import androidx.appsearch.localstorage.LocalStorage.SearchContext;
 import androidx.test.core.app.ApplicationProvider;
-import androidx.test.ext.junit.runners.AndroidJUnit4;
+import androidx.test.filters.SmallTest;
 
 import org.junit.Before;
 import org.junit.Test;
-import org.junit.runner.RunWith;
 
 import java.util.List;
 
-@RunWith(AndroidJUnit4.class)
-public class AndroidXSmokeTest {
+@SmallTest
+public class AppSearchSmokeTest {
     private AppSearchSession appSearch;
 
     @Before
@@ -50,8 +48,7 @@ public class AndroidXSmokeTest {
         appSearch =
                 LocalStorage.createSearchSession(
                                 new SearchContext.Builder(
-                                                ApplicationProvider.getApplicationContext(),
-                                                "database")
+                                                ApplicationProvider.getApplicationContext())
                                         .build())
                         .get();
         // Remove all data before test
@@ -82,7 +79,7 @@ public class AndroidXSmokeTest {
                                 .build())
                 .get();
 
-        TestDocument input = new TestDocument("namespace", "id1", "avocado");
+        TestDocument input = new TestDocument("uri1", "avocado");
         appSearch
                 .put(new PutDocumentsRequest.Builder().addDocuments(input).build())
                 .get()
@@ -98,11 +95,10 @@ public class AndroidXSmokeTest {
         SearchResult result = page.get(0);
         assertThat(results.getNextPage().get()).isEmpty();
 
-        GenericDocument genericOutput = result.getGenericDocument();
-        assertEquals("id1", genericOutput.getId());
-        assertEquals("avocado", genericOutput.getPropertyString("body"));
-        TestDocument output = genericOutput.toDocumentClass(TestDocument.class);
-        assertEquals("id1", output.getId());
+        assertEquals("uri1", result.getDocument().getUri());
+        assertEquals("avocado", result.getDocument().getPropertyString("body"));
+        TestDocument output = result.getDocument().toDocumentClass(TestDocument.class);
+        assertEquals("uri1", output.getUri());
         assertEquals("avocado", output.getBody());
     }
 }
diff --git a/java/tests/instrumentation/src/androidx/appsearch/smoketest/TestDocument.java b/java/tests/instrumentation/src/androidx/appsearch/smoketest/TestDocument.java
index ebf32e4..089ff55 100644
--- a/java/tests/instrumentation/src/androidx/appsearch/smoketest/TestDocument.java
+++ b/java/tests/instrumentation/src/androidx/appsearch/smoketest/TestDocument.java
@@ -21,28 +21,21 @@ import androidx.appsearch.app.AppSearchSchema.StringPropertyConfig;
 
 @Document
 public class TestDocument {
-    @Document.Namespace private final String mNamespace;
+    @Document.Uri private final String uri;
 
-    @Document.Id private final String mId;
+    @Document.Property(indexingType = StringPropertyConfig.INDEXING_TYPE_PREFIXES)
+    private final String body;
 
-    @Document.StringProperty(indexingType = StringPropertyConfig.INDEXING_TYPE_PREFIXES)
-    private final String mBody;
-
-    TestDocument(String namespace, String id, String body) {
-        mNamespace = namespace;
-        mId = id;
-        mBody = body;
-    }
-
-    public String getNamespace() {
-        return mNamespace;
+    TestDocument(String uri, String body) {
+        this.uri = uri;
+        this.body = body;
     }
 
-    public String getId() {
-        return mId;
+    public String getUri() {
+        return uri;
     }
 
     public String getBody() {
-        return mBody;
+        return body;
     }
 }
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index a46814c..64f98f6 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -51,11 +51,7 @@ import com.google.android.icing.proto.StatusProto;
 import com.google.android.icing.proto.StorageInfoResultProto;
 import com.google.android.icing.proto.StringIndexingConfig;
 import com.google.android.icing.proto.StringIndexingConfig.TokenizerType;
-import com.google.android.icing.proto.SuggestionResponse;
-import com.google.android.icing.proto.SuggestionSpecProto;
-import com.google.android.icing.proto.SuggestionSpecProto.SuggestionScoringSpecProto;
 import com.google.android.icing.proto.TermMatchType;
-import com.google.android.icing.proto.TermMatchType.Code;
 import com.google.android.icing.proto.UsageReport;
 import com.google.android.icing.IcingSearchEngine;
 import java.io.File;
@@ -63,6 +59,7 @@ import java.util.HashMap;
 import java.util.Map;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
@@ -493,6 +490,7 @@ public final class IcingSearchEngineTest {
   }
 
   @Test
+  @Ignore("b/190845688")
   public void testCJKTSnippets() throws Exception {
     assertStatusOk(icingSearchEngine.initialize().getStatus());
 
@@ -500,13 +498,12 @@ public final class IcingSearchEngineTest {
     assertStatusOk(
         icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus());
 
-    // String:     "天是蓝的"
-    //              ^ ^^ ^
-    // UTF16 idx:   0 1 2 3
-    // Breaks into segments: "天", "是", "蓝", "的"
-    // "The sky is blue"
-    String chinese = "天是蓝的";
-    assertThat(chinese.length()).isEqualTo(4);
+    // String:     "我每天走路去上班。"
+    //              ^ ^  ^   ^^
+    // UTF16 idx:   0 1  3   5 6
+    // Breaks into segments: "我", "每天", "走路", "去", "上班"
+    String chinese = "我每天走路去上班。";
+    assertThat(chinese.length()).isEqualTo(9);
     DocumentProto emailDocument1 =
         createEmailDocument("namespace", "uri1").toBuilder()
             .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(chinese))
@@ -516,7 +513,7 @@ public final class IcingSearchEngineTest {
     // Search and request snippet matching but no windowing.
     SearchSpecProto searchSpec =
         SearchSpecProto.newBuilder()
-            .setQuery("是")
+            .setQuery("每")
             .setTermMatchType(TermMatchType.Code.PREFIX)
             .build();
     ResultSpecProto resultSpecProto =
@@ -555,9 +552,9 @@ public final class IcingSearchEngineTest {
     int matchStart = matchProto.getExactMatchUtf16Position();
     int matchEnd = matchStart + matchProto.getExactMatchUtf16Length();
     assertThat(matchStart).isEqualTo(1);
-    assertThat(matchEnd).isEqualTo(2);
+    assertThat(matchEnd).isEqualTo(3);
     String match = content.substring(matchStart, matchEnd);
-    assertThat(match).isEqualTo("是");
+    assertThat(match).isEqualTo("每天");
   }
 
   @Test
@@ -627,47 +624,6 @@ public final class IcingSearchEngineTest {
     assertThat(match).isEqualTo("𐀂𐀃");
   }
 
-  @Test
-  public void testSearchSuggestions() {
-    assertStatusOk(icingSearchEngine.initialize().getStatus());
-
-    SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
-    SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
-    assertThat(
-            icingSearchEngine
-                .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
-                .getStatus()
-                .getCode())
-        .isEqualTo(StatusProto.Code.OK);
-
-    DocumentProto emailDocument1 =
-        createEmailDocument("namespace", "uri1").toBuilder()
-            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("fo"))
-            .build();
-    DocumentProto emailDocument2 =
-        createEmailDocument("namespace", "uri2").toBuilder()
-            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
-            .build();
-    assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
-    assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus());
-
-    SuggestionSpecProto suggestionSpec =
-        SuggestionSpecProto.newBuilder()
-            .setPrefix("f")
-            .setNumToReturn(10)
-            .setScoringSpec(
-                SuggestionScoringSpecProto.newBuilder()
-                    .setScoringMatchType(Code.EXACT_ONLY)
-                    .build())
-            .build();
-
-    SuggestionResponse response = icingSearchEngine.searchSuggestions(suggestionSpec);
-    assertStatusOk(response.getStatus());
-    assertThat(response.getSuggestionsList()).hasSize(2);
-    assertThat(response.getSuggestions(0).getQuery()).isEqualTo("foo");
-    assertThat(response.getSuggestions(1).getQuery()).isEqualTo("fo");
-  }
-
   private static void assertStatusOk(StatusProto status) {
     assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK);
   }
diff --git a/proto/Android.bp b/proto/Android.bp
index cda0ec2..4fb0c18 100644
--- a/proto/Android.bp
+++ b/proto/Android.bp
@@ -43,5 +43,4 @@ cc_library_static {
         export_proto_headers: true,
     },
     srcs: ["icing/**/*.proto"],
-    min_sdk_version: "Tiramisu",
 }
diff --git a/proto/icing/proto/debug.proto b/proto/icing/proto/debug.proto
deleted file mode 100644
index 504ae43..0000000
--- a/proto/icing/proto/debug.proto
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2022 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto2";
-
-package icing.lib;
-
-import "icing/proto/schema.proto";
-import "icing/proto/status.proto";
-import "icing/proto/storage.proto";
-
-option java_package = "com.google.android.icing.proto";
-option java_multiple_files = true;
-option objc_class_prefix = "ICNG";
-
-// Next tag: 4
-message IndexDebugInfoProto {
-  // Storage information of the index.
-  optional IndexStorageInfoProto index_storage_info = 1;
-
-  message MainIndexDebugInfoProto {
-    // Information about the main lexicon.
-    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
-    optional string lexicon_info = 1;
-
-    // Last added document id.
-    optional uint32 last_added_document_id = 2;
-
-    // If verbosity > 0, return information about the posting list storage.
-    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
-    optional string flash_index_storage_info = 3;
-  }
-  optional MainIndexDebugInfoProto main_index_info = 2;
-
-  message LiteIndexDebugInfoProto {
-    // Current number of hits.
-    optional uint32 curr_size = 1;
-
-    // The maximum possible number of hits.
-    optional uint32 hit_buffer_size = 2;
-
-    // Last added document id.
-    optional uint32 last_added_document_id = 3;
-
-    // The first position in the hit buffer that is not sorted yet,
-    // or curr_size if all hits are sorted.
-    optional uint32 searchable_end = 4;
-
-    // The most recent checksum of the lite index, by calling
-    // LiteIndex::ComputeChecksum().
-    optional uint32 index_crc = 5;
-
-    // Information about the lite lexicon.
-    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
-    optional string lexicon_info = 6;
-  }
-  optional LiteIndexDebugInfoProto lite_index_info = 3;
-}
-
-// Next tag: 4
-message DocumentDebugInfoProto {
-  // Storage information of the document store.
-  optional DocumentStorageInfoProto document_storage_info = 1;
-
-  // The most recent checksum of the document store, by calling
-  // DocumentStore::ComputeChecksum().
-  optional uint32 crc = 2;
-
-  message CorpusInfo {
-    optional string namespace = 1;
-    optional string schema = 2;
-    optional uint32 total_documents = 3;
-    optional uint32 total_token = 4;
-  }
-
-  // If verbosity > 0, return the total number of documents and tokens in each
-  // (namespace, schema type) pair.
-  // Note that deleted and expired documents are skipped in the output.
-  repeated CorpusInfo corpus_info = 3;
-}
-
-// Next tag: 3
-message SchemaDebugInfoProto {
-  // Copy of the SchemaProto if it has been set in the schema store.
-  // Modifying this does not affect the Schema that IcingSearchEngine holds.
-  optional SchemaProto schema = 1;
-
-  // The most recent checksum of the schema store, by calling
-  // SchemaStore::ComputeChecksum().
-  optional uint32 crc = 2;
-}
-
-// Next tag: 4
-message DebugInfoProto {
-  // Debug information of the index.
-  optional IndexDebugInfoProto index_info = 1;
-
-  // Debug information of the document store.
-  optional DocumentDebugInfoProto document_info = 2;
-
-  // Debug information of the schema store.
-  optional SchemaDebugInfoProto schema_info = 3;
-}
-
-// Next tag: 3
-message DebugInfoResultProto {
-  // Status code can be one of:
-  //   OK
-  //   FAILED_PRECONDITION
-  //
-  // See status.proto for more details.
-  optional StatusProto status = 1;
-
-  // Debug information for Icing.
-  optional DebugInfoProto debug_info = 2;
-}
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index 1a501e7..9a4e5b9 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -209,7 +209,7 @@ message DeleteBySchemaTypeResultProto {
 }
 
 // Result of a call to IcingSearchEngine.DeleteByQuery
-// Next tag: 5
+// Next tag: 3
 message DeleteByQueryResultProto {
   // Status code can be one of:
   //   OK
@@ -224,20 +224,5 @@ message DeleteByQueryResultProto {
   optional StatusProto status = 1;
 
   // Stats for delete execution performance.
-  optional DeleteByQueryStatsProto delete_by_query_stats = 3;
-
-  // Used by DeleteByQueryResultProto to return information about deleted
-  // documents.
-  message DocumentGroupInfo {
-    optional string namespace = 1;
-    optional string schema = 2;
-    repeated string uris = 3;
-  }
-
-  // Additional return message that shows the uris of the deleted documents, if
-  // users set return_deleted_document_info to true.
-  // The result is grouped by the corresponding namespace and type.
-  repeated DocumentGroupInfo deleted_documents = 4;
-
-  reserved 2;
+  optional DeleteStatsProto delete_stats = 2;
 }
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index 7fe1e6f..ab2556d 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -30,6 +30,19 @@ message IcingSearchEngineOptions {
   // the index saved by the last instance.
   optional string base_dir = 1;
 
+  // The maximum number of tokens to be allowed per document. If a document
+  // exceeds this number of tokens, then only the first max_tokens_per_doc
+  // will be indexed.
+  //
+  // Clients may use this value to prevent the possibility of a select few
+  // documents from exhausting limits in the index that are shared between all
+  // documents (ie max allowed index size).
+  //
+  // Valid values: [1, INT_MAX], Current default is 1/5 of the default of
+  // max_document_size.
+  // Optional.
+  optional int32 max_tokens_per_doc = 2 [default = 13107];
+
   // The maximum allowable token length. All tokens in excess of this size
   // will be truncated to max_token_length before being indexed.
   //
@@ -57,8 +70,6 @@ message IcingSearchEngineOptions {
   // Valid values: [1, INT_MAX]
   // Optional.
   optional int32 index_merge_size = 4 [default = 1048576];  // 1 MiB
-
-  reserved 2;
 }
 
 // Result of a call to IcingSearchEngine.Initialize
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 0a7c4a6..29f7f80 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -23,7 +23,7 @@ option java_multiple_files = true;
 option objc_class_prefix = "ICNG";
 
 // Stats of the top-level function IcingSearchEngine::Initialize().
-// Next tag: 12
+// Next tag: 11
 message InitializeStatsProto {
   // Overall time used for the function call.
   optional int32 latency_ms = 1;
@@ -46,9 +46,6 @@ message InitializeStatsProto {
 
     // Random I/O errors.
     IO_ERROR = 4;
-
-    // The document log is using legacy format.
-    LEGACY_DOCUMENT_LOG_FORMAT = 5;
   }
 
   // Possible recovery causes for document store:
@@ -95,10 +92,6 @@ message InitializeStatsProto {
 
   // Number of schema types currently in schema store.
   optional int32 num_schema_types = 10;
-
-  // Number of consecutive initialization failures that immediately preceded
-  // this initialization.
-  optional int32 num_previous_init_failures = 11;
 }
 
 // Stats of the top-level function IcingSearchEngine::Put().
@@ -121,10 +114,12 @@ message PutDocumentStatsProto {
   optional int32 document_size = 5;
 
   message TokenizationStats {
+    // Whether the number of tokens to be indexed exceeded the max number of
+    // tokens per document.
+    optional bool exceeded_max_token_num = 2;
+
     // Number of tokens added to the index.
     optional int32 num_tokens_indexed = 1;
-
-    reserved 2;
   }
   optional TokenizationStats tokenization_stats = 6;
 }
@@ -186,7 +181,8 @@ message QueryStatsProto {
 }
 
 // Stats of the top-level functions IcingSearchEngine::Delete,
-// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType.
+// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType,
+// IcingSearchEngine::DeleteByQuery.
 // Next tag: 4
 message DeleteStatsProto {
   // Overall time used for the function call.
@@ -200,10 +196,8 @@ message DeleteStatsProto {
       // Delete one document.
       SINGLE = 1;
 
-      // Delete by query. This value is deprecated.
-      // IcingSearchEngine::DeleteByQuery will return a DeleteByQueryStatsProto
-      // rather than a DeleteStatsProto.
-      DEPRECATED_QUERY = 2 [deprecated = true];
+      // Delete by query.
+      QUERY = 2;
 
       // Delete by namespace.
       NAMESPACE = 3;
@@ -217,32 +211,3 @@ message DeleteStatsProto {
   // Number of documents deleted by this call.
   optional int32 num_documents_deleted = 3;
 }
-
-// Stats of the top-level functions IcingSearchEngine::DeleteByQuery.
-// Next tag: 9
-message DeleteByQueryStatsProto {
-  // Overall time used for the function call.
-  optional int32 latency_ms = 1;
-
-  // Number of documents deleted by this call.
-  optional int32 num_documents_deleted = 2;
-
-  // The UTF-8 length of the query string
-  optional int32 query_length = 3;
-
-  // Number of terms in the query string.
-  optional int32 num_terms = 4;
-
-  // Number of namespaces filtered.
-  optional int32 num_namespaces_filtered = 5;
-
-  // Number of schema types filtered.
-  optional int32 num_schema_types_filtered = 6;
-
-  // Time used to parse the query, including 2 parts: tokenizing and
-  // transforming tokens into an iterator tree.
-  optional int32 parse_query_latency_ms = 7;
-
-  // Time used to delete each document.
-  optional int32 document_removal_latency_ms = 8;
-}
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index ffb6f2c..4188a8c 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -91,14 +91,6 @@ message StringIndexingConfig {
 
       // Tokenization for plain text.
       PLAIN = 1;
-
-      // Tokenizes text in verbatim. This means no normalization or segmentation
-      // is applied to string values that are tokenized using this type.
-      // Therefore, the output token is equivalent to the raw string text. For
-      // example, "Hello, world!" would be tokenized as "Hello, world!"
-      // preserving punctuation and capitalization, and not creating separate
-      // tokens between the space.
-      VERBATIM = 2;
     }
   }
   optional TokenizerType.Code tokenizer_type = 2;
@@ -205,7 +197,7 @@ message SchemaProto {
 }
 
 // Result of a call to IcingSearchEngine.SetSchema
-// Next tag: 8
+// Next tag: 4
 message SetSchemaResultProto {
   // Status code can be one of:
   //   OK
@@ -229,21 +221,6 @@ message SetSchemaResultProto {
   // documents that fail validation against the new schema types would also be
   // deleted.
   repeated string incompatible_schema_types = 3;
-
-  // Schema types that did not exist in the previous schema and were added with
-  // the new schema type.
-  repeated string new_schema_types = 4;
-
-  // Schema types that were changed in a way that was backwards compatible and
-  // didn't invalidate the index.
-  repeated string fully_compatible_changed_schema_types = 5;
-
-  // Schema types that were changed in a way that was backwards compatible, but
-  // invalidated the index.
-  repeated string index_incompatible_changed_schema_types = 6;
-
-  // Overall time used for the function call.
-  optional int32 latency_ms = 7;
 }
 
 // Result of a call to IcingSearchEngine.GetSchema
diff --git a/proto/icing/proto/scoring.proto b/proto/icing/proto/scoring.proto
index 71c943e..6186fde 100644
--- a/proto/icing/proto/scoring.proto
+++ b/proto/icing/proto/scoring.proto
@@ -23,7 +23,7 @@ option objc_class_prefix = "ICNG";
 // Encapsulates the configurations on how Icing should score and rank the search
 // results.
 // TODO(b/170347684): Change all timestamps to seconds.
-// Next tag: 4
+// Next tag: 3
 message ScoringSpecProto {
   // OPTIONAL: Indicates how the search results will be ranked.
   message RankingStrategy {
@@ -83,42 +83,4 @@ message ScoringSpecProto {
     }
   }
   optional Order.Code order_by = 2;
-
-  // OPTIONAL: Specifies property weights for RELEVANCE_SCORE scoring strategy.
-  // Property weights are used for promoting or demoting query term matches in a
-  // document property. When property weights are provided, the term frequency
-  // is multiplied by the normalized property weight when computing the
-  // normalized term frequency component of BM25F. To prefer query term matches
-  // in the "subject" property over the "body" property of "Email" documents,
-  // set a higher property weight value for "subject" than "body". By default,
-  // all properties that are not specified are given a raw, pre-normalized
-  // weight of 1.0 when scoring.
-  repeated TypePropertyWeights type_property_weights = 3;
-}
-
-// Next tag: 3
-message TypePropertyWeights {
-  // Schema type to apply property weights to.
-  optional string schema_type = 1;
-
-  // Property weights to apply to the schema type.
-  repeated PropertyWeight property_weights = 2;
-}
-
-// Next tag: 3
-message PropertyWeight {
-  // Property path to assign property weight to. Property paths must be composed
-  // only of property names and property separators (the '.' character).
-  // For example, if an "Email" schema type has string property "subject" and
-  // document property "sender", which has string property "name", the property
-  // path for the email's subject would just be "subject" and the property path
-  // for the sender's name would be "sender.name". If an invalid path is
-  // specified, the property weight is discarded.
-  optional string path = 1;
-
-  // Property weight, valid values are positive and zero. Setting a zero
-  // property weight will remove scoring contribution for a query term match in
-  // the property. Negative weights are invalid and will result in an error.
-  // By default, a property is given a raw, pre-normalized weight of 1.0.
-  optional double weight = 2;
 }
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index f005c76..66fdbe6 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -85,16 +85,16 @@ message ResultSpecProto {
     // have snippet information provided. If set to 0, snippeting is disabled.
     optional int32 num_matches_per_property = 2;
 
-    // How large of a window to provide. Windows start at
-    // max_window_utf32_length / 2 bytes before the middle of the matching token
-    // and end at max_window_utf32_length / 2 bytes after the middle of the
-    // matching token. Windowing respects token boundaries. Therefore, the
-    // returned window may be smaller than requested. Setting
-    // max_window_utf32_length to 0 will disable windowing information. If
-    // matches enabled is also set to false, then snippeting is disabled. Ex.
-    // max_window_utf32_length = 16. "foo bar baz bat rat" with a query of "baz"
+    // How large of a window to provide. Windows start at max_window_bytes / 2
+    // bytes before the middle of the matching token and end at max_window_bytes
+    // / 2 bytes after the middle of the matching token. Windowing respects
+    // token boundaries.
+    // Therefore, the returned window may be smaller than requested. Setting
+    // max_window_bytes to 0 will disable windowing information. If matches
+    // enabled is also set to false, then snippeting is disabled.
+    // Ex. max_window_bytes = 16. "foo bar baz bat rat" with a query of "baz"
     // will return a window of "bar baz bat" which is only 11 bytes long.
-    optional int32 max_window_utf32_length = 3;
+    optional int32 max_window_bytes = 3;
   }
   optional SnippetSpecProto snippet_spec = 3;
 
@@ -136,57 +136,27 @@ message ResultSpecProto {
 }
 
 // The representation of a single match within a DocumentProto property.
-//
-// Example : A document whose content is "Necesito comprar comida mañana." and a
-// query for "mana" with window=15
-// Next tag: 12
+// Next tag: 10
 message SnippetMatchProto {
   // The index of the byte in the string at which the match begins and the
   // length in bytes of the match.
-  //
-  // For the example above, the values of these fields would be
-  // exact_match_byte_position=24, exact_match_byte_length=7 "mañana"
   optional int32 exact_match_byte_position = 2;
   optional int32 exact_match_byte_length = 3;
 
-  // The length in bytes of the subterm that matches the query. The beginning of
-  // the submatch is the same as exact_match_byte_position.
-  //
-  // For the example above, the value of this field would be 5. With
-  // exact_match_byte_position=24 above, it would produce the substring "maña"
-  optional int32 submatch_byte_length = 10;
-
   // The index of the UTF-16 code unit in the string at which the match begins
   // and the length in UTF-16 code units of the match. This is for use with
   // UTF-16 encoded strings like Java.lang.String.
-  //
-  // For the example above, the values of these fields would be
-  // exact_match_utf16_position=24, exact_match_utf16_length=6 "mañana"
   optional int32 exact_match_utf16_position = 6;
   optional int32 exact_match_utf16_length = 7;
 
-  // The length in UTF-16 code units of the subterm that matches the query. The
-  // beginning of the submatch is the same as exact_match_utf16_position. This
-  // is for use with UTF-16 encoded strings like Java.lang.String.
-  //
-  // For the example above, the value of this field would be 4. With
-  // exact_match_utf16_position=24 above, it would produce the substring "maña"
-  optional int32 submatch_utf16_length = 11;
-
   // The index of the byte in the string at which the suggested snippet window
   // begins and the length in bytes of the window.
-  //
-  // For the example above, the values of these fields would be
-  // window_byte_position=17, window_byte_length=15 "comida mañana."
   optional int32 window_byte_position = 4;
   optional int32 window_byte_length = 5;
 
   // The index of the UTF-16 code unit in the string at which the suggested
   // snippet window begins and the length in UTF-16 code units of the window.
   // This is for use with UTF-16 encoded strings like Java.lang.String.
-  //
-  // For the example above, the values of these fields would be
-  // window_utf16_position=17, window_utf16_length=14 "comida mañana."
   optional int32 window_utf16_position = 8;
   optional int32 window_utf16_length = 9;
 
@@ -308,54 +278,3 @@ message GetResultSpecProto {
   // type will be retrieved.
   repeated TypePropertyMask type_property_masks = 1;
 }
-
-// Next tag: 5
-message SuggestionSpecProto {
-  // REQUIRED: The "raw" prefix string that users may type. For example, "f"
-  // will search for suggested query that start with "f" like "foo", "fool".
-  optional string prefix = 1;
-
-  // OPTIONAL: Only search for suggestions that under the specified namespaces.
-  // If unset, the suggestion will search over all namespaces. Note that this
-  // applies to the entire 'prefix'. To issue different suggestions for
-  // different namespaces, separate RunSuggestion()'s will need to be made.
-  repeated string namespace_filters = 2;
-
-  // REQUIRED: The number of suggestions to be returned.
-  optional int32 num_to_return = 3;
-
-  // Indicates how the suggestion terms should be scored and ranked.
-  message SuggestionScoringSpecProto {
-    // TermMatchType.Code=UNKNOWN
-    // Should never purposely be set and may lead to undefined behavior. This is
-    // used for backwards compatibility reasons.
-    //
-    // TermMatchType.Code=EXACT_ONLY
-    // Only exact hits will be counted to score a suggestion term.
-    //
-    // TermMatchType.Code=PREFIX
-    // Both exact hits and prefix hits will be counted to score a suggestion
-    // term.
-    optional TermMatchType.Code scoring_match_type = 1;
-  }
-
-  optional SuggestionScoringSpecProto scoring_spec = 4;
-}
-
-// Next tag: 3
-message SuggestionResponse {
-  message Suggestion {
-    // The suggested query string for client to search for.
-    optional string query = 1;
-  }
-
-  // Status code can be one of:
-  //   OK
-  //   FAILED_PRECONDITION
-  //   INTERNAL
-  //
-  // See status.proto for more details.
-  optional StatusProto status = 1;
-
-  repeated Suggestion suggestions = 2;
-}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 73d349b..35ad6d9 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=436284873)
+set(synced_AOSP_CL_number=378695940)
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2022-06-15 21:39:32 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2022-06-15 21:39:32 +0000
commit	854cabe58fe83993ab608b428c6a97c5565dcb0c (patch)
tree	c0a00b9b4d52ff3dfeb50f5d894bad2d71389b00
parent	9c6c6103b62bb8941c2bd711f0e6cb47b6f10b2e (diff)
parent	98f9e8aacdf9898e4ff093385365a233d25bf24f (diff)
download	icing-aml_tz3_314012010.tar.gz