summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShahms King <shahms@google.com>2022-07-20 11:27:14 -0700
committerGitHub <noreply@github.com>2022-07-20 11:27:14 -0700
commit541aa64bf3e5831c165a14b2da77e73b629c9f97 (patch)
tree431b12da5208b78ec224ff5da1d854c41d686e8f
parentb527138606ea28760d9f0cc5ab9e0296441a82d0 (diff)
downloadkythe-541aa64bf3e5831c165a14b2da77e73b629c9f97.tar.gz
feat(cxx_indexer): implement directory traversal in indexer VFS (#5325)
* feat(cxx_indexer): implement directory traversal in indexer VFS * chore: report file not found from dir_begin
-rw-r--r--kythe/cxx/indexer/cxx/BUILD16
-rw-r--r--kythe/cxx/indexer/cxx/KytheVFS.cc80
-rw-r--r--kythe/cxx/indexer/cxx/KytheVFS.h52
-rw-r--r--kythe/cxx/indexer/cxx/KytheVFS_test.cc147
4 files changed, 257 insertions, 38 deletions
diff --git a/kythe/cxx/indexer/cxx/BUILD b/kythe/cxx/indexer/cxx/BUILD
index 87dfd83f6..ce8ccfe31 100644
--- a/kythe/cxx/indexer/cxx/BUILD
+++ b/kythe/cxx/indexer/cxx/BUILD
@@ -412,6 +412,9 @@ cc_library(
"//kythe/proto:analysis_cc_proto",
"//kythe/proto:common_cc_proto",
"//kythe/proto:storage_cc_proto",
+ "@com_github_google_glog//:glog",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
@@ -422,6 +425,19 @@ cc_library(
],
)
+cc_test(
+ name = "vfs_test",
+ srcs = ["KytheVFS_test.cc"],
+ deps = [
+ ":vfs",
+ "//kythe/proto:analysis_cc_proto",
+ "//third_party:gtest",
+ "//third_party:gtest_main",
+ "@com_google_absl//absl/strings",
+ "@llvm-project//llvm:Support",
+ ],
+)
+
cc_library(
name = "lib",
srcs = [
diff --git a/kythe/cxx/indexer/cxx/KytheVFS.cc b/kythe/cxx/indexer/cxx/KytheVFS.cc
index 64533a4e7..c3fa10965 100644
--- a/kythe/cxx/indexer/cxx/KytheVFS.cc
+++ b/kythe/cxx/indexer/cxx/KytheVFS.cc
@@ -16,9 +16,12 @@
#include "KytheVFS.h"
+#include <system_error>
+
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
+#include "glog/logging.h"
#include "kythe/cxx/indexer/cxx/proto_conversions.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
@@ -54,18 +57,19 @@ std::string FixupPath(llvm::StringRef path, llvm::sys::path::Style style) {
}
} // anonymous namespace
-IndexVFS::IndexVFS(const std::string& working_directory,
+IndexVFS::IndexVFS(absl::string_view working_directory,
const std::vector<proto::FileData>& virtual_files,
const std::vector<llvm::StringRef>& virtual_dirs,
llvm::sys::path::Style style)
- : virtual_files_(virtual_files),
- working_directory_(FixupPath(working_directory, style)) {
+ : working_directory_(FixupPath(
+ llvm::StringRef(working_directory.data(), working_directory.size()),
+ style)) {
if (!llvm::sys::path::is_absolute(working_directory_,
llvm::sys::path::Style::posix)) {
absl::FPrintF(stderr, "warning: working directory %s is not absolute\n",
working_directory_);
}
- for (const auto& data : virtual_files_) {
+ for (const auto& data : virtual_files) {
std::string path = FixupPath(ToStringRef(data.info().path()), style);
if (auto* record = FileRecordForPath(path, BehaviorOnMissing::kCreateFile,
data.content().size())) {
@@ -81,12 +85,6 @@ IndexVFS::IndexVFS(const std::string& working_directory,
FileRecordForPath(".", BehaviorOnMissing::kCreateDirectory, 0);
}
-IndexVFS::~IndexVFS() {
- for (auto& entry : uid_to_record_map_) {
- delete entry.second;
- }
-}
-
llvm::ErrorOr<llvm::vfs::Status> IndexVFS::status(const llvm::Twine& path) {
if (const auto* record =
FileRecordForPath(path.str(), BehaviorOnMissing::kReturnError, 0)) {
@@ -121,6 +119,13 @@ llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> IndexVFS::openFileForRead(
llvm::vfs::directory_iterator IndexVFS::dir_begin(const llvm::Twine& dir,
std::error_code& error_code) {
+ std::string path = dir.str();
+ if (auto* record =
+ FileRecordForPath(path, BehaviorOnMissing::kReturnError, 0)) {
+ return llvm::vfs::directory_iterator(
+ std::make_shared<DirectoryIteratorImpl>(std::move(path), record));
+ }
+ error_code = std::make_error_code(std::errc::no_such_file_or_directory);
return llvm::vfs::directory_iterator();
}
@@ -193,15 +198,21 @@ IndexVFS::FileRecord* IndexVFS::FileRecordForPathRoot(const llvm::Twine& path,
} else if (!create_if_missing) {
return nullptr;
} else {
- name_record = new FileRecord(
- {llvm::vfs::Status(root_name, llvm::vfs::getNextVirtualUniqueID(),
- llvm::sys::TimePoint<>(), 0, 0, 0,
- llvm::sys::fs::file_type::directory_file,
- llvm::sys::fs::all_read),
- false, std::string(root_name)});
- root_name_to_root_map_[std::string(root_name)] = name_record;
- uid_to_record_map_[PairFromUid(name_record->status.getUniqueID())] =
- name_record;
+ FileRecord record = {
+ .status = llvm::vfs::Status(
+ root_name, llvm::vfs::getNextVirtualUniqueID(),
+ llvm::sys::TimePoint<>(), 0, 0, 0,
+ llvm::sys::fs::file_type::directory_file, llvm::sys::fs::all_read),
+ .has_vname = false,
+ .label = std::string(root_name),
+ };
+ auto [iter, inserted] = uid_to_record_map_.insert_or_assign(
+ PairFromUid(record.status.getUniqueID()),
+ std::make_unique<FileRecord>(record));
+ CHECK(inserted) << "Duplicated entries detected!";
+ root_name_to_root_map_.insert_or_assign(std::string(root_name),
+ iter->second.get());
+ name_record = iter->second.get();
}
return AllocOrReturnFileRecord(name_record, create_if_missing, root_dir,
llvm::sys::fs::file_type::directory_file, 0);
@@ -306,15 +317,28 @@ IndexVFS::FileRecord* IndexVFS::AllocOrReturnFileRecord(
}
llvm::SmallString<1024> out_path(llvm::StringRef(parent->status.getName()));
llvm::sys::path::append(out_path, llvm::sys::path::Style::posix, label);
- FileRecord* new_record = new FileRecord{
- llvm::vfs::Status(out_path, llvm::vfs::getNextVirtualUniqueID(),
- llvm::sys::TimePoint<>(), 0, 0, size, type,
- llvm::sys::fs::all_read),
- false, std::string(label)};
- parent->children.push_back(new_record);
- uid_to_record_map_[PairFromUid(new_record->status.getUniqueID())] =
- new_record;
- return new_record;
+ FileRecord record = {
+ .status = llvm::vfs::Status(out_path, llvm::vfs::getNextVirtualUniqueID(),
+ llvm::sys::TimePoint<>(), 0, 0, size, type,
+ llvm::sys::fs::all_read),
+ .has_vname = false,
+ .label = std::string(label),
+ };
+ auto [iter, inserted] = uid_to_record_map_.insert_or_assign(
+ PairFromUid(record.status.getUniqueID()),
+ std::make_unique<FileRecord>(record));
+ CHECK(inserted) << "Duplicate entries detected!";
+ parent->children.push_back(iter->second.get());
+ return iter->second.get();
+}
+
+llvm::vfs::directory_entry IndexVFS::DirectoryIteratorImpl::GetEntry() const {
+ if (curr_ == end_) {
+ return {};
+ }
+ llvm::SmallString<1024> path = llvm::StringRef(path_);
+ llvm::sys::path::append(path, llvm::sys::path::Style::posix, (*curr_)->label);
+ return {std::string(path), (*curr_)->status.getType()};
}
} // namespace kythe
diff --git a/kythe/cxx/indexer/cxx/KytheVFS.h b/kythe/cxx/indexer/cxx/KytheVFS.h
index e0b05892c..85337439b 100644
--- a/kythe/cxx/indexer/cxx/KytheVFS.h
+++ b/kythe/cxx/indexer/cxx/KytheVFS.h
@@ -17,6 +17,11 @@
#ifndef KYTHE_CXX_COMMON_INDEXING_KYTHE_VFS_H_
#define KYTHE_CXX_COMMON_INDEXING_KYTHE_VFS_H_
+#include <memory>
+
+#include "absl/base/attributes.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "clang/Basic/FileManager.h"
#include "kythe/proto/analysis.pb.h"
@@ -33,19 +38,24 @@ namespace kythe {
class IndexVFS : public llvm::vfs::FileSystem {
public:
/// \param working_directory The absolute path to the working directory.
- /// \param virtual_files Files to map.
+ /// \param virtual_files Files to map. File content must outlive IndexVFS.
/// \param virtual_dirs Directories to map.
/// \param style Style used to parse incoming paths. Paths are normalized
/// to POSIX-style.
- IndexVFS(const std::string& working_directory,
- const std::vector<proto::FileData>& virtual_files,
- const std::vector<llvm::StringRef>& virtual_dirs,
- llvm::sys::path::Style style);
+ explicit IndexVFS(absl::string_view working_directory,
+ const std::vector<proto::FileData>& virtual_files
+ ABSL_ATTRIBUTE_LIFETIME_BOUND,
+ const std::vector<llvm::StringRef>& virtual_dirs,
+ llvm::sys::path::Style style);
/// \return nullopt if `awd` is not absolute or its style could not be
/// detected; otherwise, the style of `awd`.
static absl::optional<llvm::sys::path::Style>
DetectStyleFromAbsoluteWorkingDirectory(const std::string& awd);
- ~IndexVFS();
+
+ // IndexVFS is neither copyable nor movable.
+ IndexVFS(const IndexVFS&) = delete;
+ IndexVFS& operator=(const IndexVFS&) = delete;
+
/// \brief Implements llvm::vfs::FileSystem::status.
llvm::ErrorOr<llvm::vfs::Status> status(const llvm::Twine& path) override;
/// \brief Implements llvm::vfs::FileSystem::openFileForRead.
@@ -116,6 +126,28 @@ class IndexVFS : public llvm::vfs::FileSystem {
std::string name_;
};
+ class DirectoryIteratorImpl : public ::llvm::vfs::detail::DirIterImpl {
+ public:
+ explicit DirectoryIteratorImpl(std::string path, const FileRecord* record)
+ : path_(std::move(path)), record_(record) {
+ CurrentEntry = GetEntry();
+ }
+
+ std::error_code increment() override {
+ ++curr_;
+ CurrentEntry = GetEntry();
+ return {};
+ }
+
+ private:
+ ::llvm::vfs::directory_entry GetEntry() const;
+
+ std::string path_;
+ const FileRecord* record_;
+ std::vector<FileRecord*>::const_iterator curr_ = record_->children.begin(),
+ end_ = record_->children.end();
+ };
+
/// \brief Controls what happens when a missing path node is encountered.
enum class BehaviorOnMissing {
kCreateFile, ///< Create intermediate directories and a final file.
@@ -149,15 +181,15 @@ class IndexVFS : public llvm::vfs::FileSystem {
llvm::sys::fs::file_type type,
size_t size);
- /// The virtual files that were included in the index.
- const std::vector<proto::FileData>& virtual_files_;
/// The working directory. Must be absolute.
std::string working_directory_;
/// Maps root names to root nodes. For indexes captured from Unix
/// environments, there will be only one root name (the empty string).
- std::map<std::string, FileRecord*> root_name_to_root_map_;
+ absl::flat_hash_map<std::string, FileRecord*> root_name_to_root_map_;
/// Maps unique IDs to file records.
- std::map<std::pair<uint64_t, uint64_t>, FileRecord*> uid_to_record_map_;
+ absl::flat_hash_map<std::pair<uint64_t, uint64_t>,
+ std::unique_ptr<FileRecord>>
+ uid_to_record_map_;
};
} // namespace kythe
diff --git a/kythe/cxx/indexer/cxx/KytheVFS_test.cc b/kythe/cxx/indexer/cxx/KytheVFS_test.cc
new file mode 100644
index 000000000..a9fd27f69
--- /dev/null
+++ b/kythe/cxx/indexer/cxx/KytheVFS_test.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2022 The Kythe Authors. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kythe/cxx/indexer/cxx/KytheVFS.h"
+
+#include <initializer_list>
+#include <string>
+#include <system_error>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "kythe/proto/analysis.pb.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
+
+namespace kythe {
+namespace {
+using ::testing::Optional;
+using ::testing::UnorderedElementsAre;
+
+template <typename Container = std::initializer_list<std::string>>
+std::vector<proto::FileData> FileDataFromPaths(Container&& paths) {
+ std::vector<proto::FileData> result;
+ for (const auto& path : paths) {
+ result.emplace_back().mutable_info()->set_path(path);
+ }
+ return result;
+}
+
+TEST(KytheVFSTest, AddingFilesCreatesTraversableTree) {
+ using ::llvm::vfs::recursive_directory_iterator;
+
+ auto files = FileDataFromPaths({
+ "path/to/a/file.txt",
+ "path/to/another/file.txt",
+ });
+ IndexVFS vfs("/working_directory", files, {}, llvm::sys::path::Style::posix);
+
+ std::vector<std::string> results;
+ std::error_code err;
+ for (recursive_directory_iterator iter(vfs, "/", err), end;
+ iter != end && !err; iter.increment(err)) {
+ results.push_back(iter->path().str());
+ }
+ ASSERT_FALSE(err) << err;
+ EXPECT_THAT(results,
+ UnorderedElementsAre(
+ "/working_directory", "/working_directory/path",
+ "/working_directory/path/to", "/working_directory/path/to/a",
+ "/working_directory/path/to/a/file.txt",
+ "/working_directory/path/to/another",
+ "/working_directory/path/to/another/file.txt"));
+}
+
+TEST(KytheVFSTest, AddingFilesCreatesParentDirectories) {
+ using ::llvm::sys::fs::file_type;
+
+ auto files = FileDataFromPaths({
+ "path/to/a/file.txt",
+ "path/to/another/file.txt",
+ });
+ IndexVFS vfs("/working_directory", files, {}, llvm::sys::path::Style::posix);
+ struct Entry {
+ std::string path;
+ file_type type = file_type::directory_file;
+ };
+ std::vector<Entry> expected = {
+ {"/"},
+ {"/working_directory"},
+ {"/working_directory/path"},
+ {"/working_directory/path/to"},
+ {"/working_directory/path/to/a"},
+ {"/working_directory/path/to/a/file.txt", file_type::regular_file},
+ {"/working_directory/path/to/another"},
+ {"/working_directory/path/to/another/file.txt", file_type::regular_file},
+ };
+
+ for (const auto& entry : expected) {
+ EXPECT_THAT(vfs.status(entry.path).get().getType(), entry.type)
+ << "Missing expected path: " << entry.path;
+ }
+}
+
+TEST(KytheVFSTest, RelativeFilesCreatesParentDirectories) {
+ using ::llvm::sys::fs::file_type;
+
+ auto files = FileDataFromPaths({
+ "../path/to/a/file.txt",
+ "../path/to/another/file.txt",
+ });
+ IndexVFS vfs("/working_directory", files, {}, llvm::sys::path::Style::posix);
+ struct Entry {
+ std::string path;
+ file_type type = file_type::directory_file;
+ };
+ std::vector<Entry> expected = {
+ {"/"},
+ {"/working_directory"},
+ {"/path"},
+ {"/path"},
+ {"/path/to"},
+ {"/path/to/a"},
+ {"/path/to/a/file.txt", file_type::regular_file},
+ {"/path/to/another"},
+ {"/path/to/another/file.txt", file_type::regular_file},
+ };
+
+ for (const auto& entry : expected) {
+ EXPECT_THAT(vfs.status(entry.path).get().getType(), entry.type)
+ << "Missing expected path: " << entry.path;
+ }
+}
+
+TEST(KytheVFSTest, DetectStyleFromAbsoluteWorkingDirectoryDoes) {
+ EXPECT_THAT(
+ IndexVFS::DetectStyleFromAbsoluteWorkingDirectory("/posix/style/path"),
+ Optional(llvm::sys::path::Style::posix));
+ EXPECT_THAT(IndexVFS::DetectStyleFromAbsoluteWorkingDirectory(
+ "C:/funky/windows/path"),
+ Optional(llvm::sys::path::Style::windows));
+}
+
+TEST(KytheVFSTest, DirBeginReportsErrorForMissingEntry) {
+ auto files = FileDataFromPaths({});
+ IndexVFS vfs("/working_directory", files, {}, llvm::sys::path::Style::posix);
+ std::error_code err;
+ EXPECT_THAT(vfs.dir_begin("/no/such/file", err),
+ llvm::vfs::directory_iterator());
+ EXPECT_THAT(err, std::make_error_code(std::errc::no_such_file_or_directory));
+}
+
+} // namespace
+} // namespace kythe