#!/usr/bin/env sh SYS=$(uname -s) test "$SYS" = "Darwin" && { echo Error: afl-cmin does not work on Apple currently. please use afl-cmin.bash instead. exit 1 } export AFL_QUIET=1 export ASAN_OPTIONS=detect_leaks=0 THISPATH=`dirname ${0}` export PATH="${THISPATH}:$PATH" awk -f - -- ${@+"$@"} <<'EOF' #!/usr/bin/awk -f # awk script to minimize a test corpus of input files # # based on afl-cmin bash script written by Michal Zalewski # rewritten by Heiko Eißfeldt (hexcoder-) # tested with: # gnu awk (x86 Linux) # bsd awk (x86 *BSD) # mawk (arm32 raspbian) # # uses getopt.awk package from Arnold Robbins # # external tools used by this script: # test # grep # rm # mkdir # ln # cp # pwd # type # cd # find # stat # sort # cut # and afl-showmap from this project :-) # getopt.awk --- Do C library getopt(3) function in awk # External variables: # Optind -- index in ARGV of first nonoption argument # Optarg -- string value of argument to current option # Opterr -- if nonzero, print our own diagnostic # Optopt -- current option letter # Returns: # -1 at end of options # "?" for unrecognized option # a character representing the current option # Private Data: # _opti -- index in multiflag option, e.g., -abc function getopt(argc, argv, options, thisopt, i) { if (length(options) == 0) # no options given return -1 if (argv[Optind] == "--") { # all done Optind++ _opti = 0 return -1 } else if (argv[Optind] !~ /^-[^:\t ]/) { _opti = 0 return -1 } if (_opti == 0) _opti = 2 thisopt = substr(argv[Optind], _opti, 1) Optopt = thisopt i = index(options, thisopt) if (i == 0) { if (Opterr) printf("%c -- invalid option\n", thisopt) > "/dev/stderr" if (_opti >= length(argv[Optind])) { Optind++ _opti = 0 } else _opti++ return "?" } if (substr(options, i + 1, 1) == ":") { # get option argument if (length(substr(argv[Optind], _opti + 1)) > 0) Optarg = substr(argv[Optind], _opti + 1) else Optarg = argv[++Optind] _opti = 0 } else Optarg = "" if (_opti == 0 || _opti >= length(argv[Optind])) { Optind++ _opti = 0 } else _opti++ return thisopt } function usage() { print \ "afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ "\n" \ "Required parameters:\n" \ " -i dir - input directory with starting corpus\n" \ " -o dir - output directory for minimized files\n" \ "\n" \ "Execution control settings:\n" \ " -T tasks - how many parallel tasks to run (default: 1, all=nproc)\n" \ " -f file - location read by the fuzzed program (stdin)\n" \ " -m megs - memory limit for child process ("mem_limit" MB)\n" \ " -t msec - run time limit for child process (default: 5000)\n" \ " -O - use binary-only instrumentation (FRIDA mode)\n" \ " -Q - use binary-only instrumentation (QEMU mode)\n" \ " -U - use unicorn-based instrumentation (unicorn mode)\n" \ " -X - use Nyx mode\n" \ "\n" \ "Minimization settings:\n" \ " -A - allow crashes and timeouts (not recommended)\n" \ " -C - keep crashing inputs, reject everything else\n" \ " -e - solve for edge coverage only, ignore hit counts\n" \ "\n" \ "For additional tips, please consult README.md\n" \ "\n" \ "Environment variables used:\n" \ "AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash\n" \ "AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the forkserver to come up\n" \ "AFL_KEEP_TRACES: leave the temporary /.traces directory\n" \ "AFL_KILL_SIGNAL: Signal delivered to child processes on timeout (default: SIGKILL)\n" \ "AFL_FORK_SERVER_KILL_SIGNAL: Signal delivered to fork server processes on\n" \ " termination (default: SIGTERM). If this is not set and AFL_KILL_SIGNAL is\n" \ " set, this will be set to the same value as AFL_KILL_SIGNAL.\n" \ "AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n" \ "AFL_CMIN_ALLOW_ANY: write tuples for crashing inputs also\n" \ "AFL_PATH: path for the afl-showmap binary if not found anywhere in PATH\n" \ "AFL_PRINT_FILENAMES: If set, the filename currently processed will be " \ "printed to stdout\n" \ "AFL_SKIP_BIN_CHECK: skip afl instrumentation checks for target binary\n" "AFL_CUSTOM_MUTATOR_LIBRARY: custom mutator library (post_process and send)\n" "AFL_PYTHON_MODULE: custom mutator library (post_process and send)\n" exit 1 } function exists_and_is_executable(binarypath) { return 0 == system("test -f "binarypath" -a -x "binarypath) } BEGIN { if (0 != system( "test -t 1")) { redirected = 1 } else { redirected = 0 } print "corpus minimization tool for AFL++ (awk version)\n" # defaults extra_par = "" AFL_CMIN_CRASHES_ONLY = "" AFL_CMIN_ALLOW_ANY = "" # process options Opterr = 1 # default is to diagnose Optind = 1 # skip ARGV[0] while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eACOQUXYT:?")) != -1) { if (_go_c == "i") { if (!Optarg) usage() if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} in_dir = Optarg continue } else if (_go_c == "T") { if (!Optarg) usage() if (threads) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} threads = Optarg continue } else if (_go_c == "o") { if (!Optarg) usage() if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} out_dir = Optarg continue } else if (_go_c == "f") { if (!Optarg) usage() if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} stdin_file = Optarg continue } else if (_go_c == "m") { if (!Optarg) usage() if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} mem_limit = Optarg mem_limit_given = 1 continue } else if (_go_c == "t") { if (!Optarg) usage() if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} timeout = Optarg continue } else if (_go_c == "C") { AFL_CMIN_CRASHES_ONLY = "AFL_CMIN_CRASHES_ONLY=1 " continue } else if (_go_c == "A") { AFL_CMIN_ALLOW_ANY = "AFL_CMIN_ALLOW_ANY=1 " continue } else if (_go_c == "e") { extra_par = extra_par " -e" continue } else if (_go_c == "O") { if (frida_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} extra_par = extra_par " -O" frida_mode = 1 continue } else if (_go_c == "Q") { if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} extra_par = extra_par " -Q" qemu_mode = 1 continue } else if (_go_c == "U") { if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} extra_par = extra_par " -U" unicorn_mode = 1 continue } else if (_go_c == "X" || _go_c == "Y") { if (nyx_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} extra_par = extra_par " -X" nyx_mode = 1 continue } else if (_go_c == "?") { exit 1 } else usage() } # while options if (!mem_limit) mem_limit = "none" if (!timeout) timeout = "5000" # get program args i = 0 prog_args_string = "" for (; Optind < ARGC; Optind++) { prog_args[i++] = ARGV[Optind] if (i > 1) prog_args_string = prog_args_string" '"ARGV[Optind]"'" } # sanity checks if (!prog_args[0] || !in_dir || !out_dir) usage() target_bin = prog_args[0] # Do a sanity check to discourage the use of /tmp, since we can't really # handle this safely from an awk script. if (!ENVIRON["AFL_ALLOW_TMP"]) { dirlist[0] = in_dir dirlist[1] = target_bin dirlist[2] = out_dir dirlist[3] = stdin_file "pwd" | getline dirlist[4] # current directory for (dirind in dirlist) { dir = dirlist[dirind] if (dir ~ /^(\/var)?\/tmp/) { print "[-] Warning: do not use this script in /tmp or /var/tmp for security reasons." > "/dev/stderr" } } delete dirlist } if (threads && stdin_file) { print "[-] Error: -T and -f cannot be used together." > "/dev/stderr" exit 1 } if (!threads && !stdin_file && !nyx_mode) { print "[*] Are you aware of the '-T all' parallelize option that improves the speed for large/slow corpuses?" } # If @@ is specified, but there's no -f, let's come up with a temporary input # file name. trace_dir = out_dir "/.traces" if (!stdin_file) { found_atat = 0 for (prog_args_ind in prog_args) { if (match(prog_args[prog_args_ind], "@@") != 0) { found_atat = 1 break } } if (found_atat) { stdin_file = trace_dir "/.cur_input" } } # Check for obvious errors. if (mem_limit && mem_limit != "none" && mem_limit < 5) { print "[-] Error: dangerously low memory limit." > "/dev/stderr" exit 1 } if (timeout && timeout != "none" && timeout < 10) { print "[-] Error: dangerously low timeout." > "/dev/stderr" exit 1 } if (!nyx_mode && target_bin && !exists_and_is_executable(target_bin)) { cmd = "command -v "target_bin" 2>/dev/null" cmd | getline tnew close(cmd) if (!tnew || !exists_and_is_executable(tnew)) { print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" exit 1 } target_bin = tnew } if (0 == system ( "grep -aq AFL_DUMP_MAP_SIZE " target_bin )) { echo "[!] Trying to obtain the map size of the target ..." get_map_size = "AFL_DUMP_MAP_SIZE=1 " target_bin get_map_size | getline mapsize close(get_map_size) if (mapsize && mapsize > 65535 && mapsize < 100000000) { AFL_MAP_SIZE = "AFL_MAP_SIZE="mapsize" " print "[+] Setting "AFL_MAP_SIZE } } if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !frida_mode && !unicorn_mode && !nyx_mode) { if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" exit 1 } } if (0 != system( "test -d "in_dir )) { print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" exit 1 } #if (0 == system( "test -d "in_dir"/default" )) { # in_dir = in_dir "/default" #} # #if (0 == system( "test -d "in_dir"/queue" )) { # in_dir = in_dir "/queue" #} system("rm -rf "trace_dir" 2>/dev/null"); system("rm "out_dir"/id[:_]* 2>/dev/null") cmd = "ls "out_dir"/* 2>/dev/null | wc -l" cmd | getline noofentries close(cmd) if (0 == system( "test -d "out_dir" -a "noofentries" -gt 0" )) { print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" exit 1 } if (threads) { cmd = "nproc" cmd | getline nproc close(cmd) if (threads == "all") { threads = nproc } else { if (!(threads > 1 && threads <= nproc)) { print "[-] Error: -T option must be between 1 and "nproc" or \"all\"." > "/dev/stderr" exit 1 } } } # Check for the more efficient way to copy files... if (0 != system("mkdir -p -m 0700 "trace_dir)) { print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" exit 1 } if (stdin_file) { # truncate input file printf "" > stdin_file close(stdin_file) } # First we look in PATH if (0 == system("command -v afl-showmap >/dev/null 2>&1")) { cmd = "command -v afl-showmap 2>/dev/null" cmd | getline showmap close(cmd) } else { # then we look in the current directory if (0 == system("test -x ./afl-showmap")) { showmap = "./afl-showmap" } else { if (ENVIRON["AFL_PATH"]) { showmap = ENVIRON["AFL_PATH"] "/afl-showmap" } } } if (!showmap || 0 != system("test -x "showmap )) { print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" exit 1 } # get list of input filenames sorted by size i = 0 # yuck, gnu stat is option incompatible to bsd stat # we use a heuristic to differentiate between # GNU stat and other stats cmd = "stat --version 2>/dev/null" cmd | getline statversion close(cmd) if (statversion ~ /GNU coreutils/ || statversion ~ /BusyBox/) { stat_format = "-c '%s %n'" # GNU } else { stat_format = "-f '%z %N'" # *BSD, MacOS } cmdline = "(cd "in_dir" && find . \\( ! -name \".*\" -a -type d \\) -o -type f -exec stat "stat_format" \\{\\} + | sort -k1n -k2r) | grep -Ev '^0'" #cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format" 2>/dev/null) | sort -k1n -k2r" #cmdline = "(cd "in_dir" && stat "stat_format" *) | sort -k1n -k2r" #cmdline = "(cd "in_dir" && ls | xargs stat "stat_format" ) | sort -k1n -k2r" while (cmdline | getline) { sub(/^[0-9]+ (\.\/)?/,"",$0) infilesSmallToBigFull[i] = $0 sub(/.*\//, "", $0) infilesSmallToBig[i] = $0 infilesSmallToBigMap[infilesSmallToBig[i]] = infilesSmallToBigFull[i] infilesSmallToBigFullMap[infilesSmallToBigFull[i]] = infilesSmallToBig[i] i++ } close(cmdline) in_count = i first_file = infilesSmallToBigFull[0] #if (0 == system("test -d ""\""in_dir"/"first_file"\"")) { # print "[-] Error: The input directory is empty or contains subdirectories - please fix." > "/dev/stderr" # exit 1 #} system(">\""in_dir"/.afl-cmin.test\"") if (0 == system("ln \""in_dir"/.afl-cmin.test\" "trace_dir"/.link_test")) { cp_tool = "ln" } else { cp_tool = "cp" } system("rm -f \""in_dir"/.afl-cmin.test\"") if (!ENVIRON["AFL_SKIP_BIN_CHECK"]) { # Make sure that we can actually get anything out of afl-showmap before we # waste too much time. print "[*] Testing the target binary..." if (!stdin_file) { system(AFL_MAP_SIZE "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") } else { system("cp \""in_dir"/"first_file"\" "stdin_file) system(AFL_MAP_SIZE "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { ++first_count } close(runtest) if (first_count) { print "[+] OK, "first_count" tuples recorded." } else { print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" if (!ENVIRON["AFL_KEEP_TRACES"]) { system("rm -rf "trace_dir" 2>/dev/null") } exit 1 } } if (in_count < threads) { threads = in_count print "[!] WARNING: less inputs than threads, reducing threads to "threads" and likely the overhead of threading makes things slower..." } # Let's roll! ############################# # STEP 1: Collecting traces # ############################# if (threads) { inputsperfile = int(in_count / threads) if (in_count % threads) { inputsperfile++; } cnt = 0; tmpfile=out_dir "/.filelist" for (instance = 1; instance < threads; instance++) { for (i = 0; i < inputsperfile; i++) { print in_dir"/"infilesSmallToBigFull[cnt] >> tmpfile"."instance cnt++ } } for (; cnt < in_count; cnt++) { print in_dir"/"infilesSmallToBigFull[cnt] >> tmpfile"."threads } } print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." cur = 0; if (threads > 1) { print "[*] Creating " threads " parallel tasks with about " inputsperfile " items each." for (i = 1; i <= threads; i++) { if (!stdin_file) { # print " { "AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -I \""tmpfile"."i"\" -- \""target_bin"\" "prog_args_string"; > "tmpfile"."i".done ; } &" retval = system(" { "AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -I \""tmpfile"."i"\" -- \""target_bin"\" "prog_args_string"; > "tmpfile"."i".done ; } &") } else { stdin_file=tmpfile"."i".stdin" # print " { "AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -I \""tmpfile"."i"\" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" "tmpfile"."i".done ; } &" retval = system(" { "AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -I \""tmpfile"."i"\" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" "tmpfile"."i".done ; } &") } } print "[*] Waiting for parallel tasks to complete ..." # wait for all processes to finish ok=0 while (ok < threads) { ok=0 for (i = 1; i <= threads; i++) { if (system("test -f "tmpfile"."i".done") == 0) { ok++ } } } print "[*] Done!" system("rm -f "tmpfile"*") } else { if (!stdin_file) { print " Processing "in_count" files (forkserver mode)..." # print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string retval = system(AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string) } else { print " Processing "in_count" files (forkserver mode)..." # print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" /dev/null") system("rmdir "out_dir) } exit retval } } ####################################################### # STEP 2: register smallest input file for each tuple # # STEP 3: copy that file (at most once) # ####################################################### print "[*] Processing traces for input files in '"in_dir"'." cur = 0 out_count = 0 tuple_count = 0 # from rare to frequent new tuples # get the best (smallest) file for it # and copy it while (cur < in_count) { fn = infilesSmallToBig[cur] ++cur if (redirected == 0) { printf "\r Processing file "cur"/"in_count } else { print " Processing file "cur"/"in_count } # create path for the trace file from afl-showmap tracefile_path = trace_dir"/"fn # ensure the file size is not zero cmd = "du -b "tracefile_path "ls -l "tracefile_path cmd | getline output close(cmd) split(output, result, "\t") if (result[1] == 0) { print "[!] WARNING: file "fn" is crashing the target, ignoring..." } # gather all keys, and count them while ((getline line < tracefile_path) > 0) { key = line if (!(key in key_count)) { ++tuple_count } ++key_count[key] if (! (key in best_file)) { # this is the best file for this key best_file[key] = fn #printf "BEST_FILE[%d]=\"%s\"\n",key,fn | "sort -t'[' -k2 > "trace_dir"/.candidate_script" } #printf "%d %s\n",key,fn > trace_dir"/.candidate_list" } close(tracefile_path) } print "" # sort keys sortedKeys = trace_dir"/.all_uniq" sortKeysCmd = "sort -k1n > "sortedKeys for (key in key_count) { printf "%7d %s\n",key_count[key],key | sortKeysCmd } close(sortKeysCmd) # iterate over keys from rare to frequent and # copy best file while ((getline < sortedKeys) > 0) { # split nrFields = split($0, field, / +/) #print nrFields" Felder: '"field[0]"', '"field[1]"', '"field[2]"', '"field[3]"'" key = field[nrFields] ++tcnt; if (redirected == 0) { printf "\r Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..." } else { print " Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..." } if (key in keyAlreadyKnown) { continue } fn = best_file[key] # gather all tuples from the best file for this key tracedfn = trace_dir"/"fn while ((getline < tracedfn) > 0) { keyAlreadyKnown[$0] = "" } close(tracedfn) # copy file unless already done if (! (fn in file_already_copied)) { realfile = infilesSmallToBigMap[fn] system(cp_tool" \""in_dir"/"realfile"\" \""out_dir"/"fn"\"") file_already_copied[fn] = "" ++out_count #printf "tuple nr %d (%d cnt=%d) -> %s\n",tcnt,key,key_count[key],fn > trace_dir"/.log" } } close(sortedKeys) print "" print "[+] Found "tuple_count" unique tuples across "in_count" files." if (out_count == 1) { print "[!] WARNING: All test cases had the same traces, check syntax!" } print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." if (!ENVIRON["AFL_KEEP_TRACES"]) { system("rm -rf "trace_dir" 2>/dev/null") } exit 0 } EOF