summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Maennich <maennich@google.com>2021-11-09 16:26:54 +0000
committerMatthias Maennich <maennich@google.com>2021-11-09 23:05:46 +0000
commit1fc9bcaac9e4215a30d1e28adb0309ca421ea07f (patch)
treefb746044000ba57b75bebda05356cf5e0f281e52
parentabd3c5f79420405903661c8c86656aca5b6a2b96 (diff)
downloadbuild-tools-1fc9bcaac9e4215a30d1e28adb0309ca421ea07f.tar.gz
Interceptor: add command analysis
Add first version of the command analysis utilities. We match the program executed against a set of (command line) analyzers and delegate the detection of any inputs/outputs that can be derived from the command line. For the moment, this only implements analyzers for clang/gcc compilers/linkers as well as archivers (ar, llvm-ar). Bug: 205577427 Signed-off-by: Matthias Maennich <maennich@google.com> Change-Id: I72791d05a946a4210be6322358003a7250912b80
-rw-r--r--interceptor/interceptor.cc117
-rw-r--r--interceptor/interceptor.h19
2 files changed, 136 insertions, 0 deletions
diff --git a/interceptor/interceptor.cc b/interceptor/interceptor.cc
index 98b8e5e..dcf7aca 100644
--- a/interceptor/interceptor.cc
+++ b/interceptor/interceptor.cc
@@ -20,13 +20,18 @@
#include <unistd.h>
#include <algorithm>
+#include <array>
#include <filesystem>
#include <fstream>
+#include <initializer_list>
#include <iomanip>
+#include <iostream>
#include <iterator>
+#include <regex>
#include <sstream>
#include <string>
#include <string_view>
+#include <type_traits>
#include <utility>
#include <android-base/strings.h>
@@ -161,6 +166,116 @@ void Command::make_relative() {
std::for_each(args_->begin(), args_->end(), replace_all);
}
+static AnalysisResult analyze_command(const interceptor::Command& command);
+
+void Command::analyze() {
+ auto [inputs, outputs] = analyze_command(*this);
+
+ // TODO: this sanitizing should be done during make_relative
+ for (auto& input : inputs) {
+ if (input.rfind("./", 0) == 0) {
+ input = input.substr(2);
+ }
+ }
+ for (auto& output : outputs) {
+ if (output.rfind("./", 0) == 0) {
+ output = output.substr(2);
+ }
+ }
+ for (const auto& input : inputs) {
+ if (!fs::is_regular_file(input)) {
+ std::cerr << "missing input: " << input << "\n";
+ std::cerr << Command::repr() << "\n";
+ exit(1);
+ }
+ }
+
+ inputs_ = std::move(inputs);
+ outputs_ = std::move(outputs);
+}
+
+/// COMMAND ANALYSIS
+
+using Analyzer = std::function<AnalysisResult(const std::string&, const ArgVec&, const EnvMap&)>;
+
+static AnalysisResult analyze_compiler_linker(const std::string&, const ArgVec& args,
+ const EnvMap&) {
+ static constexpr std::array kSkipNextArgs{
+ "-isystem", "-I", "-L", "-m", "-soname", "-z",
+ };
+ static constexpr std::string_view kOutputOption = "-Wp,-MMD,";
+
+ AnalysisResult result;
+ bool next_is_out = false;
+ bool skip_next = false;
+ // skip args[0] as this is the program itself
+ for (auto it = args.cbegin() + 1; it != args.cend(); ++it) {
+ const auto& arg = *it;
+ if (arg == "-o") {
+ next_is_out = true;
+ continue;
+ }
+ if (next_is_out) {
+ result.outputs.push_back(arg);
+ next_is_out = false;
+ continue;
+ }
+ if (arg.rfind(kOutputOption, 0) == 0) {
+ result.outputs.push_back(arg.substr(kOutputOption.size()));
+ }
+ if (skip_next) {
+ skip_next = false;
+ continue;
+ }
+ if (std::find(kSkipNextArgs.cbegin(), kSkipNextArgs.cend(), arg) != kSkipNextArgs.cend()) {
+ skip_next = true;
+ }
+ // ignore test compilations
+ if (arg == "/dev/null" || arg == "-") {
+ return {};
+ }
+ if (arg[0] == '-') { // ignore flags
+ continue;
+ }
+ result.inputs.push_back(arg);
+ }
+
+ return result;
+}
+
+static AnalysisResult analyze_archiver(const std::string&, const ArgVec& args, const EnvMap&) {
+ AnalysisResult result;
+
+ if (args.size() < 3) return result;
+ // skip args[0] as this is the program itself
+ // skip args[1] are the archiver flags
+ // args[2] is the output
+ result.outputs.push_back(args[2]);
+ // args[3:] are the inputs
+ result.inputs.insert(result.inputs.cend(), args.cbegin() + 3, args.cend());
+ return result;
+}
+
+static const std::initializer_list<std::pair<std::regex, Analyzer>> analyzers{
+ {
+ std::regex("^(.*/)?(clang|clang\\+\\+|gcc|g\\+\\+|ld(\\.lld)?|llvm-strip)$"),
+ analyze_compiler_linker,
+ },
+ {
+ std::regex("^(.*/)?(llvm-)?ar$"),
+ analyze_archiver,
+ },
+};
+
+static AnalysisResult analyze_command(const Command& command) {
+ for (const auto& [regex, analyzer] : analyzers) {
+ if (std::regex_match(command.args()[0], regex)) {
+ return analyzer(command.program(), command.args(), command.env());
+ }
+ }
+ return {};
+}
+
} // namespace interceptor
/// UTILITY FUNCTIONS
@@ -182,6 +297,8 @@ static void process_command(const char* filename, char* const argv[], char* cons
// furthermore necessary to produce cache hits in RBE.
command.make_relative();
+ command.analyze();
+
log(command, "");
// pass down the transformed command to execve
diff --git a/interceptor/interceptor.h b/interceptor/interceptor.h
index 77b137a..3f738b2 100644
--- a/interceptor/interceptor.h
+++ b/interceptor/interceptor.h
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include <functional>
#include <optional>
#include <string>
#include <unordered_map>
@@ -25,6 +26,9 @@ namespace interceptor {
using ArgVec = std::vector<std::string>;
using EnvMap = std::unordered_map<std::string, std::string>;
+using Inputs = std::vector<std::string>;
+using Outputs = Inputs;
+
// Command abstraction
//
// This is a utility container to keep program, args and env in an accessible
@@ -39,11 +43,17 @@ class Command {
char* const* envp() const { return envp_; };
+ const Inputs& inputs() const { return inputs_; }
+ const Outputs& outputs() const { return outputs_; }
+
std::string repr() const;
// make command line calls relative to ROOT_DIR
void make_relative();
+ // determine inputs/outputs
+ void analyze();
+
private:
std::string program_;
std::string cwd_;
@@ -53,6 +63,15 @@ class Command {
mutable std::optional<ArgVec> args_;
mutable std::optional<EnvMap> env_;
+
+ Inputs inputs_;
+ Outputs outputs_;
};
+// Command analysis
+
+struct AnalysisResult {
+ Inputs inputs;
+ Outputs outputs;
+};
} // namespace interceptor