diff options
author | Shahms King <shahms@google.com> | 2022-07-20 11:27:14 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-20 11:27:14 -0700 |
commit | 541aa64bf3e5831c165a14b2da77e73b629c9f97 (patch) | |
tree | 431b12da5208b78ec224ff5da1d854c41d686e8f | |
parent | b527138606ea28760d9f0cc5ab9e0296441a82d0 (diff) | |
download | kythe-541aa64bf3e5831c165a14b2da77e73b629c9f97.tar.gz |
feat(cxx_indexer): implement directory traversal in indexer VFS (#5325)
* feat(cxx_indexer): implement directory traversal in indexer VFS
* chore: report file not found from dir_begin
-rw-r--r-- | kythe/cxx/indexer/cxx/BUILD | 16 | ||||
-rw-r--r-- | kythe/cxx/indexer/cxx/KytheVFS.cc | 80 | ||||
-rw-r--r-- | kythe/cxx/indexer/cxx/KytheVFS.h | 52 | ||||
-rw-r--r-- | kythe/cxx/indexer/cxx/KytheVFS_test.cc | 147 |
4 files changed, 257 insertions, 38 deletions
diff --git a/kythe/cxx/indexer/cxx/BUILD b/kythe/cxx/indexer/cxx/BUILD index 87dfd83f6..ce8ccfe31 100644 --- a/kythe/cxx/indexer/cxx/BUILD +++ b/kythe/cxx/indexer/cxx/BUILD @@ -412,6 +412,9 @@ cc_library( "//kythe/proto:analysis_cc_proto", "//kythe/proto:common_cc_proto", "//kythe/proto:storage_cc_proto", + "@com_github_google_glog//:glog", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -422,6 +425,19 @@ cc_library( ], ) +cc_test( + name = "vfs_test", + srcs = ["KytheVFS_test.cc"], + deps = [ + ":vfs", + "//kythe/proto:analysis_cc_proto", + "//third_party:gtest", + "//third_party:gtest_main", + "@com_google_absl//absl/strings", + "@llvm-project//llvm:Support", + ], +) + cc_library( name = "lib", srcs = [ diff --git a/kythe/cxx/indexer/cxx/KytheVFS.cc b/kythe/cxx/indexer/cxx/KytheVFS.cc index 64533a4e7..c3fa10965 100644 --- a/kythe/cxx/indexer/cxx/KytheVFS.cc +++ b/kythe/cxx/indexer/cxx/KytheVFS.cc @@ -16,9 +16,12 @@ #include "KytheVFS.h" +#include <system_error> + #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" +#include "glog/logging.h" #include "kythe/cxx/indexer/cxx/proto_conversions.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" @@ -54,18 +57,19 @@ std::string FixupPath(llvm::StringRef path, llvm::sys::path::Style style) { } } // anonymous namespace -IndexVFS::IndexVFS(const std::string& working_directory, +IndexVFS::IndexVFS(absl::string_view working_directory, const std::vector<proto::FileData>& virtual_files, const std::vector<llvm::StringRef>& virtual_dirs, llvm::sys::path::Style style) - : virtual_files_(virtual_files), - working_directory_(FixupPath(working_directory, style)) { + : working_directory_(FixupPath( + llvm::StringRef(working_directory.data(), working_directory.size()), + style)) { if (!llvm::sys::path::is_absolute(working_directory_, llvm::sys::path::Style::posix)) { absl::FPrintF(stderr, "warning: working directory %s is not absolute\n", working_directory_); } - for (const auto& data : virtual_files_) { + for (const auto& data : virtual_files) { std::string path = FixupPath(ToStringRef(data.info().path()), style); if (auto* record = FileRecordForPath(path, BehaviorOnMissing::kCreateFile, data.content().size())) { @@ -81,12 +85,6 @@ IndexVFS::IndexVFS(const std::string& working_directory, FileRecordForPath(".", BehaviorOnMissing::kCreateDirectory, 0); } -IndexVFS::~IndexVFS() { - for (auto& entry : uid_to_record_map_) { - delete entry.second; - } -} - llvm::ErrorOr<llvm::vfs::Status> IndexVFS::status(const llvm::Twine& path) { if (const auto* record = FileRecordForPath(path.str(), BehaviorOnMissing::kReturnError, 0)) { @@ -121,6 +119,13 @@ llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> IndexVFS::openFileForRead( llvm::vfs::directory_iterator IndexVFS::dir_begin(const llvm::Twine& dir, std::error_code& error_code) { + std::string path = dir.str(); + if (auto* record = + FileRecordForPath(path, BehaviorOnMissing::kReturnError, 0)) { + return llvm::vfs::directory_iterator( + std::make_shared<DirectoryIteratorImpl>(std::move(path), record)); + } + error_code = std::make_error_code(std::errc::no_such_file_or_directory); return llvm::vfs::directory_iterator(); } @@ -193,15 +198,21 @@ IndexVFS::FileRecord* IndexVFS::FileRecordForPathRoot(const llvm::Twine& path, } else if (!create_if_missing) { return nullptr; } else { - name_record = new FileRecord( - {llvm::vfs::Status(root_name, llvm::vfs::getNextVirtualUniqueID(), - llvm::sys::TimePoint<>(), 0, 0, 0, - llvm::sys::fs::file_type::directory_file, - llvm::sys::fs::all_read), - false, std::string(root_name)}); - root_name_to_root_map_[std::string(root_name)] = name_record; - uid_to_record_map_[PairFromUid(name_record->status.getUniqueID())] = - name_record; + FileRecord record = { + .status = llvm::vfs::Status( + root_name, llvm::vfs::getNextVirtualUniqueID(), + llvm::sys::TimePoint<>(), 0, 0, 0, + llvm::sys::fs::file_type::directory_file, llvm::sys::fs::all_read), + .has_vname = false, + .label = std::string(root_name), + }; + auto [iter, inserted] = uid_to_record_map_.insert_or_assign( + PairFromUid(record.status.getUniqueID()), + std::make_unique<FileRecord>(record)); + CHECK(inserted) << "Duplicated entries detected!"; + root_name_to_root_map_.insert_or_assign(std::string(root_name), + iter->second.get()); + name_record = iter->second.get(); } return AllocOrReturnFileRecord(name_record, create_if_missing, root_dir, llvm::sys::fs::file_type::directory_file, 0); @@ -306,15 +317,28 @@ IndexVFS::FileRecord* IndexVFS::AllocOrReturnFileRecord( } llvm::SmallString<1024> out_path(llvm::StringRef(parent->status.getName())); llvm::sys::path::append(out_path, llvm::sys::path::Style::posix, label); - FileRecord* new_record = new FileRecord{ - llvm::vfs::Status(out_path, llvm::vfs::getNextVirtualUniqueID(), - llvm::sys::TimePoint<>(), 0, 0, size, type, - llvm::sys::fs::all_read), - false, std::string(label)}; - parent->children.push_back(new_record); - uid_to_record_map_[PairFromUid(new_record->status.getUniqueID())] = - new_record; - return new_record; + FileRecord record = { + .status = llvm::vfs::Status(out_path, llvm::vfs::getNextVirtualUniqueID(), + llvm::sys::TimePoint<>(), 0, 0, size, type, + llvm::sys::fs::all_read), + .has_vname = false, + .label = std::string(label), + }; + auto [iter, inserted] = uid_to_record_map_.insert_or_assign( + PairFromUid(record.status.getUniqueID()), + std::make_unique<FileRecord>(record)); + CHECK(inserted) << "Duplicate entries detected!"; + parent->children.push_back(iter->second.get()); + return iter->second.get(); +} + +llvm::vfs::directory_entry IndexVFS::DirectoryIteratorImpl::GetEntry() const { + if (curr_ == end_) { + return {}; + } + llvm::SmallString<1024> path = llvm::StringRef(path_); + llvm::sys::path::append(path, llvm::sys::path::Style::posix, (*curr_)->label); + return {std::string(path), (*curr_)->status.getType()}; } } // namespace kythe diff --git a/kythe/cxx/indexer/cxx/KytheVFS.h b/kythe/cxx/indexer/cxx/KytheVFS.h index e0b05892c..85337439b 100644 --- a/kythe/cxx/indexer/cxx/KytheVFS.h +++ b/kythe/cxx/indexer/cxx/KytheVFS.h @@ -17,6 +17,11 @@ #ifndef KYTHE_CXX_COMMON_INDEXING_KYTHE_VFS_H_ #define KYTHE_CXX_COMMON_INDEXING_KYTHE_VFS_H_ +#include <memory> + +#include "absl/base/attributes.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "clang/Basic/FileManager.h" #include "kythe/proto/analysis.pb.h" @@ -33,19 +38,24 @@ namespace kythe { class IndexVFS : public llvm::vfs::FileSystem { public: /// \param working_directory The absolute path to the working directory. - /// \param virtual_files Files to map. + /// \param virtual_files Files to map. File content must outlive IndexVFS. /// \param virtual_dirs Directories to map. /// \param style Style used to parse incoming paths. Paths are normalized /// to POSIX-style. - IndexVFS(const std::string& working_directory, - const std::vector<proto::FileData>& virtual_files, - const std::vector<llvm::StringRef>& virtual_dirs, - llvm::sys::path::Style style); + explicit IndexVFS(absl::string_view working_directory, + const std::vector<proto::FileData>& virtual_files + ABSL_ATTRIBUTE_LIFETIME_BOUND, + const std::vector<llvm::StringRef>& virtual_dirs, + llvm::sys::path::Style style); /// \return nullopt if `awd` is not absolute or its style could not be /// detected; otherwise, the style of `awd`. static absl::optional<llvm::sys::path::Style> DetectStyleFromAbsoluteWorkingDirectory(const std::string& awd); - ~IndexVFS(); + + // IndexVFS is neither copyable nor movable. + IndexVFS(const IndexVFS&) = delete; + IndexVFS& operator=(const IndexVFS&) = delete; + /// \brief Implements llvm::vfs::FileSystem::status. llvm::ErrorOr<llvm::vfs::Status> status(const llvm::Twine& path) override; /// \brief Implements llvm::vfs::FileSystem::openFileForRead. @@ -116,6 +126,28 @@ class IndexVFS : public llvm::vfs::FileSystem { std::string name_; }; + class DirectoryIteratorImpl : public ::llvm::vfs::detail::DirIterImpl { + public: + explicit DirectoryIteratorImpl(std::string path, const FileRecord* record) + : path_(std::move(path)), record_(record) { + CurrentEntry = GetEntry(); + } + + std::error_code increment() override { + ++curr_; + CurrentEntry = GetEntry(); + return {}; + } + + private: + ::llvm::vfs::directory_entry GetEntry() const; + + std::string path_; + const FileRecord* record_; + std::vector<FileRecord*>::const_iterator curr_ = record_->children.begin(), + end_ = record_->children.end(); + }; + /// \brief Controls what happens when a missing path node is encountered. enum class BehaviorOnMissing { kCreateFile, ///< Create intermediate directories and a final file. @@ -149,15 +181,15 @@ class IndexVFS : public llvm::vfs::FileSystem { llvm::sys::fs::file_type type, size_t size); - /// The virtual files that were included in the index. - const std::vector<proto::FileData>& virtual_files_; /// The working directory. Must be absolute. std::string working_directory_; /// Maps root names to root nodes. For indexes captured from Unix /// environments, there will be only one root name (the empty string). - std::map<std::string, FileRecord*> root_name_to_root_map_; + absl::flat_hash_map<std::string, FileRecord*> root_name_to_root_map_; /// Maps unique IDs to file records. - std::map<std::pair<uint64_t, uint64_t>, FileRecord*> uid_to_record_map_; + absl::flat_hash_map<std::pair<uint64_t, uint64_t>, + std::unique_ptr<FileRecord>> + uid_to_record_map_; }; } // namespace kythe diff --git a/kythe/cxx/indexer/cxx/KytheVFS_test.cc b/kythe/cxx/indexer/cxx/KytheVFS_test.cc new file mode 100644 index 000000000..a9fd27f69 --- /dev/null +++ b/kythe/cxx/indexer/cxx/KytheVFS_test.cc @@ -0,0 +1,147 @@ +/* + * Copyright 2022 The Kythe Authors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kythe/cxx/indexer/cxx/KytheVFS.h" + +#include <initializer_list> +#include <string> +#include <system_error> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "kythe/proto/analysis.pb.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" + +namespace kythe { +namespace { +using ::testing::Optional; +using ::testing::UnorderedElementsAre; + +template <typename Container = std::initializer_list<std::string>> +std::vector<proto::FileData> FileDataFromPaths(Container&& paths) { + std::vector<proto::FileData> result; + for (const auto& path : paths) { + result.emplace_back().mutable_info()->set_path(path); + } + return result; +} + +TEST(KytheVFSTest, AddingFilesCreatesTraversableTree) { + using ::llvm::vfs::recursive_directory_iterator; + + auto files = FileDataFromPaths({ + "path/to/a/file.txt", + "path/to/another/file.txt", + }); + IndexVFS vfs("/working_directory", files, {}, llvm::sys::path::Style::posix); + + std::vector<std::string> results; + std::error_code err; + for (recursive_directory_iterator iter(vfs, "/", err), end; + iter != end && !err; iter.increment(err)) { + results.push_back(iter->path().str()); + } + ASSERT_FALSE(err) << err; + EXPECT_THAT(results, + UnorderedElementsAre( + "/working_directory", "/working_directory/path", + "/working_directory/path/to", "/working_directory/path/to/a", + "/working_directory/path/to/a/file.txt", + "/working_directory/path/to/another", + "/working_directory/path/to/another/file.txt")); +} + +TEST(KytheVFSTest, AddingFilesCreatesParentDirectories) { + using ::llvm::sys::fs::file_type; + + auto files = FileDataFromPaths({ + "path/to/a/file.txt", + "path/to/another/file.txt", + }); + IndexVFS vfs("/working_directory", files, {}, llvm::sys::path::Style::posix); + struct Entry { + std::string path; + file_type type = file_type::directory_file; + }; + std::vector<Entry> expected = { + {"/"}, + {"/working_directory"}, + {"/working_directory/path"}, + {"/working_directory/path/to"}, + {"/working_directory/path/to/a"}, + {"/working_directory/path/to/a/file.txt", file_type::regular_file}, + {"/working_directory/path/to/another"}, + {"/working_directory/path/to/another/file.txt", file_type::regular_file}, + }; + + for (const auto& entry : expected) { + EXPECT_THAT(vfs.status(entry.path).get().getType(), entry.type) + << "Missing expected path: " << entry.path; + } +} + +TEST(KytheVFSTest, RelativeFilesCreatesParentDirectories) { + using ::llvm::sys::fs::file_type; + + auto files = FileDataFromPaths({ + "../path/to/a/file.txt", + "../path/to/another/file.txt", + }); + IndexVFS vfs("/working_directory", files, {}, llvm::sys::path::Style::posix); + struct Entry { + std::string path; + file_type type = file_type::directory_file; + }; + std::vector<Entry> expected = { + {"/"}, + {"/working_directory"}, + {"/path"}, + {"/path"}, + {"/path/to"}, + {"/path/to/a"}, + {"/path/to/a/file.txt", file_type::regular_file}, + {"/path/to/another"}, + {"/path/to/another/file.txt", file_type::regular_file}, + }; + + for (const auto& entry : expected) { + EXPECT_THAT(vfs.status(entry.path).get().getType(), entry.type) + << "Missing expected path: " << entry.path; + } +} + +TEST(KytheVFSTest, DetectStyleFromAbsoluteWorkingDirectoryDoes) { + EXPECT_THAT( + IndexVFS::DetectStyleFromAbsoluteWorkingDirectory("/posix/style/path"), + Optional(llvm::sys::path::Style::posix)); + EXPECT_THAT(IndexVFS::DetectStyleFromAbsoluteWorkingDirectory( + "C:/funky/windows/path"), + Optional(llvm::sys::path::Style::windows)); +} + +TEST(KytheVFSTest, DirBeginReportsErrorForMissingEntry) { + auto files = FileDataFromPaths({}); + IndexVFS vfs("/working_directory", files, {}, llvm::sys::path::Style::posix); + std::error_code err; + EXPECT_THAT(vfs.dir_begin("/no/such/file", err), + llvm::vfs::directory_iterator()); + EXPECT_THAT(err, std::make_error_code(std::errc::no_such_file_or_directory)); +} + +} // namespace +} // namespace kythe |