aboutsummaryrefslogtreecommitdiff
path: root/instrumentation/afl-gcc-cmptrs-pass.so.cc
diff options
context:
space:
mode:
Diffstat (limited to 'instrumentation/afl-gcc-cmptrs-pass.so.cc')
-rw-r--r--instrumentation/afl-gcc-cmptrs-pass.so.cc369
1 files changed, 369 insertions, 0 deletions
diff --git a/instrumentation/afl-gcc-cmptrs-pass.so.cc b/instrumentation/afl-gcc-cmptrs-pass.so.cc
new file mode 100644
index 00000000..929a9d7a
--- /dev/null
+++ b/instrumentation/afl-gcc-cmptrs-pass.so.cc
@@ -0,0 +1,369 @@
+/* GCC plugin for cmplog routines instrumentation of code for AFL++.
+
+ Copyright 2014-2019 Free Software Foundation, Inc
+ Copyright 2015, 2016 Google Inc. All rights reserved.
+ Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+ Copyright 2019-2024 AdaCore
+
+ Written by Alexandre Oliva <oliva@adacore.com>, based on the AFL++
+ LLVM CmpLog Routines pass by Andrea Fioraldi
+ <andreafioraldi@gmail.com>, and on the AFL GCC CmpLog pass.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+ */
+
+#include "afl-gcc-common.h"
+
+/* This plugin, being under the same license as GCC, satisfies the
+ "GPL-compatible Software" definition in the GCC RUNTIME LIBRARY
+ EXCEPTION, so it can be part of an "Eligible" "Compilation
+ Process". */
+int plugin_is_GPL_compatible = 1;
+
+namespace {
+
+static const struct pass_data afl_cmptrs_pass_data = {
+
+ .type = GIMPLE_PASS,
+ .name = "aflcmptrs",
+ .optinfo_flags = OPTGROUP_NONE,
+ .tv_id = TV_NONE,
+ .properties_required = 0,
+ .properties_provided = 0,
+ .properties_destroyed = 0,
+ .todo_flags_start = 0,
+ .todo_flags_finish = (TODO_update_ssa | TODO_cleanup_cfg | TODO_verify_il |
+ TODO_rebuild_cgraph_edges),
+
+};
+
+struct afl_cmptrs_pass : afl_base_pass {
+
+ afl_cmptrs_pass(bool quiet)
+ : afl_base_pass(quiet, /*debug=*/false, afl_cmptrs_pass_data),
+ tp8u(),
+ cmptrs_hooks() {
+
+ }
+
+ /* A pointer type to a unsigned 8-bit integral type. */
+ tree tp8u;
+
+ /* Declarations for the various cmptrs hook functions, allocated on
+ demand.. [0] is for compares between any pointers, [1] is for
+ compares between G++ std::string, [2] is for compares between G++
+ std::string and GCC C strings, [3] and [4] are analogous to [1]
+ and [2] but for LLVM C++ strings. */
+ tree cmptrs_hooks[5];
+
+ tree cmptrs_hook(unsigned i) {
+
+ if (!tp8u) {
+
+ tree t8u;
+ if (BITS_PER_UNIT == 8)
+ t8u = unsigned_char_type_node;
+ else
+ t8u = build_nonstandard_integer_type(8, 1);
+ tp8u = build_pointer_type(t8u);
+
+ }
+
+ if (i <= ARRAY_SIZE(cmptrs_hooks) && cmptrs_hooks[i])
+ return cmptrs_hooks[i];
+
+ const char *n = NULL;
+
+ switch (i) {
+
+ case 0:
+ n = "__cmplog_rtn_hook";
+ break;
+
+ case 1:
+ n = "__cmplog_rtn_gcc_stdstring_stdstring";
+ break;
+
+ case 2:
+ n = "__cmplog_rtn_gcc_stdstring_cstring";
+ break;
+
+ case 3:
+ n = "__cmplog_rtn_llvm_stdstring_stdstring";
+ break;
+
+ case 4:
+ n = "__cmplog_rtn_llvm_stdstring_cstring";
+ break;
+
+ default:
+ gcc_unreachable();
+
+ }
+
+ tree fnt = build_function_type_list(void_type_node, tp8u, tp8u, NULL_TREE);
+ tree t = cmptrs_hooks[i] = build_fn_decl(n, fnt);
+
+ /* Mark the newly-created decl as non-throwing, so that we can
+ insert call within basic blocks. */
+ TREE_NOTHROW(t) = 1;
+
+ return t;
+
+ }
+
+ /* Return true if T is the char* type. */
+ bool is_c_string(tree t) {
+
+ return (POINTER_TYPE_P(t) &&
+ TYPE_MAIN_VARIANT(TREE_TYPE(t)) == char_type_node);
+
+ }
+
+ /* Return true if T is an indirect std::string type. The LLVM pass
+ tests portions of the mangled name of the callee. We could do
+ that in GCC too, but computing the mangled name may cause
+ template instantiations and get symbols defined that could
+ otherwise be considered unused. We check for compatible layout,
+ and class, namespace, and field names. These have been unchanged
+ since at least GCC 7, probably longer, up to GCC 11. Odds are
+ that, if it were to change in significant ways, mangling would
+ also change to flag the incompatibility, and we'd have to use a
+ different hook anyway. */
+ bool is_gxx_std_string(tree t) {
+
+ /* We need a pointer or reference type. */
+ if (!POINTER_TYPE_P(t)) return false;
+
+ /* Get to the pointed-to type. */
+ t = TREE_TYPE(t);
+ if (!t) return false;
+
+ /* Select the main variant, so that can compare types with pointers. */
+ t = TYPE_MAIN_VARIANT(t);
+
+ /* We expect it to be a record type. */
+ if (TREE_CODE(t) != RECORD_TYPE) return false;
+
+ /* The type has an identifier. */
+ if (!TYPE_IDENTIFIER(t)) return false;
+
+ /* The type of the template is basic_string. */
+ if (strcmp(IDENTIFIER_POINTER(TYPE_IDENTIFIER(t)), "basic_string") != 0)
+ return false;
+
+ /* It's declared in an internal namespace named __cxx11. */
+ tree c = DECL_CONTEXT(TYPE_NAME(t));
+ if (!c || TREE_CODE(c) != NAMESPACE_DECL ||
+ strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "__cxx11") != 0)
+ return false;
+
+ /* The __cxx11 namespace is a member of namespace std. */
+ c = DECL_CONTEXT(c);
+ if (!c || TREE_CODE(c) != NAMESPACE_DECL ||
+ strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "std") != 0)
+ return false;
+
+ /* And the std namespace is in the global namespace. */
+ c = DECL_CONTEXT(c);
+ if (c && TREE_CODE(c) != TRANSLATION_UNIT_DECL) return false;
+
+ /* Check that the first nonstatic data member of the record type
+ is named _M_dataplus. */
+ for (c = TYPE_FIELDS(t); c; c = DECL_CHAIN(c))
+ if (TREE_CODE(c) == FIELD_DECL) break;
+ if (!c || !integer_zerop(DECL_FIELD_BIT_OFFSET(c)) ||
+ strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "_M_dataplus") != 0)
+ return false;
+
+ /* Check that the second nonstatic data member of the record type
+ is named _M_string_length. */
+ tree f2;
+ for (f2 = DECL_CHAIN(c); f2; f2 = DECL_CHAIN(f2))
+ if (TREE_CODE(f2) == FIELD_DECL) break;
+ if (!f2 /* No need to check this field's offset. */
+ || strcmp(IDENTIFIER_POINTER(DECL_NAME(f2)), "_M_string_length") != 0)
+ return false;
+
+ /* The type of the second data member is size_t. */
+ if (!TREE_TYPE(f2) || TYPE_MAIN_VARIANT(TREE_TYPE(f2)) != size_type_node)
+ return false;
+
+ /* Now go back to the first data member. Its type should be a
+ record type named _Alloc_hider. */
+ c = TREE_TYPE(c);
+ if (!c || TREE_CODE(c) != RECORD_TYPE || !TYPE_IDENTIFIER(t) ||
+ strcmp(IDENTIFIER_POINTER(TYPE_IDENTIFIER(c)), "_Alloc_hider") != 0)
+ return false;
+
+ /* And its first data member is named _M_p. */
+ for (c = TYPE_FIELDS(c); c; c = DECL_CHAIN(c))
+ if (TREE_CODE(c) == FIELD_DECL) break;
+ if (!c || !integer_zerop(DECL_FIELD_BIT_OFFSET(c)) ||
+ strcmp(IDENTIFIER_POINTER(DECL_NAME(c)), "_M_p") != 0)
+ return false;
+
+ /* For the basic_string<char> type we're interested in, the type
+ of the data member is the C string type. */
+ if (!is_c_string(TREE_TYPE(c))) return false;
+
+ /* This might not be the real thing, but the bits that matter for
+ the hook are there. */
+
+ return true;
+
+ }
+
+ /* ??? This is not implemented. What would the point be of
+ recognizing LLVM's string type in GCC? */
+ bool is_llvm_std_string(tree t) {
+
+ return false;
+
+ }
+
+ virtual unsigned int execute(function *fn) {
+
+ if (!isInInstrumentList(fn)) return 0;
+
+ basic_block bb;
+ FOR_EACH_BB_FN(bb, fn) {
+
+ for (gimple_stmt_iterator gsi = gsi_after_labels(bb); !gsi_end_p(gsi);
+ gsi_next(&gsi)) {
+
+ gimple stmt = gsi_stmt(gsi);
+
+ /* We're only interested in GIMPLE_CALLs. */
+ if (gimple_code(stmt) != GIMPLE_CALL) continue;
+
+ if (gimple_call_num_args(stmt) < 2) continue;
+
+ gcall *c = as_a<gcall *>(stmt);
+
+ tree callee_type = gimple_call_fntype(c);
+
+ if (!callee_type || !TYPE_ARG_TYPES(callee_type) ||
+ !TREE_CHAIN(TYPE_ARG_TYPES(callee_type)))
+ continue;
+
+ tree arg_type[2] = {
+
+ TYPE_MAIN_VARIANT(TREE_VALUE(TYPE_ARG_TYPES(callee_type))),
+ TYPE_MAIN_VARIANT(
+ TREE_VALUE(TREE_CHAIN(TYPE_ARG_TYPES(callee_type))))};
+
+ tree fn = NULL;
+ /* Callee arglist starts with two GCC std::string arguments. */
+ if (arg_type[0] == arg_type[1] && is_gxx_std_string(arg_type[0]))
+ fn = cmptrs_hook(1);
+ /* Callee arglist starts with GCC std::string and C string. */
+ else if (is_gxx_std_string(arg_type[0]) && is_c_string(arg_type[1]))
+ fn = cmptrs_hook(2);
+ /* Callee arglist starts with two LLVM std::string arguments. */
+ else if (arg_type[0] == arg_type[1] && is_llvm_std_string(arg_type[0]))
+ fn = cmptrs_hook(3);
+ /* Callee arglist starts with LLVM std::string and C string. */
+ else if (is_llvm_std_string(arg_type[0]) && is_c_string(arg_type[1]))
+ fn = cmptrs_hook(4);
+ /* Callee arglist starts with two pointers to the same type,
+ and callee returns a value. */
+ else if (arg_type[0] == arg_type[1] && POINTER_TYPE_P(arg_type[0]) &&
+ (TYPE_MAIN_VARIANT(gimple_call_return_type(c)) !=
+ void_type_node))
+ fn = cmptrs_hook(0);
+ else
+ continue;
+
+ tree arg[2] = {gimple_call_arg(c, 0), gimple_call_arg(c, 1)};
+
+ for (unsigned i = 0; i < ARRAY_SIZE(arg); i++) {
+
+ tree c = fold_convert_loc(UNKNOWN_LOCATION, tp8u, arg[i]);
+ if (!is_gimple_val(c)) {
+
+ tree s = make_ssa_name(tp8u);
+ gimple g = gimple_build_assign(s, c);
+ c = s;
+ gsi_insert_before(&gsi, g, GSI_SAME_STMT);
+
+ }
+
+ arg[i] = c;
+
+ }
+
+ gimple call = gimple_build_call(fn, 2, arg[0], arg[1]);
+ gsi_insert_before(&gsi, call, GSI_SAME_STMT);
+
+ }
+
+ }
+
+ return 0;
+
+ }
+
+};
+
+static struct plugin_info afl_cmptrs_plugin = {
+
+ .version = "20220420",
+ .help = G_("AFL gcc cmptrs plugin\n\
+\n\
+Set AFL_QUIET in the environment to silence it.\n\
+"),
+
+};
+
+} // namespace
+
+/* This is the function GCC calls when loading a plugin. Initialize
+ and register further callbacks. */
+int plugin_init(struct plugin_name_args *info,
+ struct plugin_gcc_version *version) {
+
+ if (!plugin_default_version_check(version, &gcc_version))
+ FATAL(G_("GCC and plugin have incompatible versions, expected GCC %s, "
+ "is %s"),
+ gcc_version.basever, version->basever);
+
+ /* Show a banner. */
+ bool quiet = false;
+ if (isatty(2) && !getenv("AFL_QUIET"))
+ SAYF(cCYA "afl-gcc-cmptrs-pass " cBRI VERSION cRST
+ " by <oliva@adacore.com>\n");
+ else
+ quiet = true;
+
+ const char *name = info->base_name;
+ register_callback(name, PLUGIN_INFO, NULL, &afl_cmptrs_plugin);
+
+ afl_cmptrs_pass *aflp = new afl_cmptrs_pass(quiet);
+ struct register_pass_info pass_info = {
+
+ .pass = aflp,
+ .reference_pass_name = "ssa",
+ .ref_pass_instance_number = 1,
+ .pos_op = PASS_POS_INSERT_AFTER,
+
+ };
+
+ register_callback(name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info);
+
+ return 0;
+
+}
+