diff options
author | Matthias Maennich <maennich@google.com> | 2021-11-09 16:26:54 +0000 |
---|---|---|
committer | Matthias Maennich <maennich@google.com> | 2021-11-09 23:05:46 +0000 |
commit | 1fc9bcaac9e4215a30d1e28adb0309ca421ea07f (patch) | |
tree | fb746044000ba57b75bebda05356cf5e0f281e52 | |
parent | abd3c5f79420405903661c8c86656aca5b6a2b96 (diff) | |
download | build-tools-1fc9bcaac9e4215a30d1e28adb0309ca421ea07f.tar.gz |
Interceptor: add command analysis
Add first version of the command analysis utilities. We match the
program executed against a set of (command line) analyzers and delegate
the detection of any inputs/outputs that can be derived from the command
line.
For the moment, this only implements analyzers for clang/gcc
compilers/linkers as well as archivers (ar, llvm-ar).
Bug: 205577427
Signed-off-by: Matthias Maennich <maennich@google.com>
Change-Id: I72791d05a946a4210be6322358003a7250912b80
-rw-r--r-- | interceptor/interceptor.cc | 117 | ||||
-rw-r--r-- | interceptor/interceptor.h | 19 |
2 files changed, 136 insertions, 0 deletions
diff --git a/interceptor/interceptor.cc b/interceptor/interceptor.cc index 98b8e5e..dcf7aca 100644 --- a/interceptor/interceptor.cc +++ b/interceptor/interceptor.cc @@ -20,13 +20,18 @@ #include <unistd.h> #include <algorithm> +#include <array> #include <filesystem> #include <fstream> +#include <initializer_list> #include <iomanip> +#include <iostream> #include <iterator> +#include <regex> #include <sstream> #include <string> #include <string_view> +#include <type_traits> #include <utility> #include <android-base/strings.h> @@ -161,6 +166,116 @@ void Command::make_relative() { std::for_each(args_->begin(), args_->end(), replace_all); } +static AnalysisResult analyze_command(const interceptor::Command& command); + +void Command::analyze() { + auto [inputs, outputs] = analyze_command(*this); + + // TODO: this sanitizing should be done during make_relative + for (auto& input : inputs) { + if (input.rfind("./", 0) == 0) { + input = input.substr(2); + } + } + for (auto& output : outputs) { + if (output.rfind("./", 0) == 0) { + output = output.substr(2); + } + } + for (const auto& input : inputs) { + if (!fs::is_regular_file(input)) { + std::cerr << "missing input: " << input << "\n"; + std::cerr << Command::repr() << "\n"; + exit(1); + } + } + + inputs_ = std::move(inputs); + outputs_ = std::move(outputs); +} + +/// COMMAND ANALYSIS + +using Analyzer = std::function<AnalysisResult(const std::string&, const ArgVec&, const EnvMap&)>; + +static AnalysisResult analyze_compiler_linker(const std::string&, const ArgVec& args, + const EnvMap&) { + static constexpr std::array kSkipNextArgs{ + "-isystem", "-I", "-L", "-m", "-soname", "-z", + }; + static constexpr std::string_view kOutputOption = "-Wp,-MMD,"; + + AnalysisResult result; + bool next_is_out = false; + bool skip_next = false; + // skip args[0] as this is the program itself + for (auto it = args.cbegin() + 1; it != args.cend(); ++it) { + const auto& arg = *it; + if (arg == "-o") { + next_is_out = true; + continue; + } + if (next_is_out) { + result.outputs.push_back(arg); + next_is_out = false; + continue; + } + if (arg.rfind(kOutputOption, 0) == 0) { + result.outputs.push_back(arg.substr(kOutputOption.size())); + } + if (skip_next) { + skip_next = false; + continue; + } + if (std::find(kSkipNextArgs.cbegin(), kSkipNextArgs.cend(), arg) != kSkipNextArgs.cend()) { + skip_next = true; + } + // ignore test compilations + if (arg == "/dev/null" || arg == "-") { + return {}; + } + if (arg[0] == '-') { // ignore flags + continue; + } + result.inputs.push_back(arg); + } + + return result; +} + +static AnalysisResult analyze_archiver(const std::string&, const ArgVec& args, const EnvMap&) { + AnalysisResult result; + + if (args.size() < 3) return result; + // skip args[0] as this is the program itself + // skip args[1] are the archiver flags + // args[2] is the output + result.outputs.push_back(args[2]); + // args[3:] are the inputs + result.inputs.insert(result.inputs.cend(), args.cbegin() + 3, args.cend()); + return result; +} + +static const std::initializer_list<std::pair<std::regex, Analyzer>> analyzers{ + { + std::regex("^(.*/)?(clang|clang\\+\\+|gcc|g\\+\\+|ld(\\.lld)?|llvm-strip)$"), + analyze_compiler_linker, + }, + { + std::regex("^(.*/)?(llvm-)?ar$"), + analyze_archiver, + }, +}; + +static AnalysisResult analyze_command(const Command& command) { + for (const auto& [regex, analyzer] : analyzers) { + if (std::regex_match(command.args()[0], regex)) { + return analyzer(command.program(), command.args(), command.env()); + } + } + return {}; +} + } // namespace interceptor /// UTILITY FUNCTIONS @@ -182,6 +297,8 @@ static void process_command(const char* filename, char* const argv[], char* cons // furthermore necessary to produce cache hits in RBE. command.make_relative(); + command.analyze(); + log(command, ""); // pass down the transformed command to execve diff --git a/interceptor/interceptor.h b/interceptor/interceptor.h index 77b137a..3f738b2 100644 --- a/interceptor/interceptor.h +++ b/interceptor/interceptor.h @@ -14,6 +14,7 @@ * limitations under the License. */ +#include <functional> #include <optional> #include <string> #include <unordered_map> @@ -25,6 +26,9 @@ namespace interceptor { using ArgVec = std::vector<std::string>; using EnvMap = std::unordered_map<std::string, std::string>; +using Inputs = std::vector<std::string>; +using Outputs = Inputs; + // Command abstraction // // This is a utility container to keep program, args and env in an accessible @@ -39,11 +43,17 @@ class Command { char* const* envp() const { return envp_; }; + const Inputs& inputs() const { return inputs_; } + const Outputs& outputs() const { return outputs_; } + std::string repr() const; // make command line calls relative to ROOT_DIR void make_relative(); + // determine inputs/outputs + void analyze(); + private: std::string program_; std::string cwd_; @@ -53,6 +63,15 @@ class Command { mutable std::optional<ArgVec> args_; mutable std::optional<EnvMap> env_; + + Inputs inputs_; + Outputs outputs_; }; +// Command analysis + +struct AnalysisResult { + Inputs inputs; + Outputs outputs; +}; } // namespace interceptor |