From 230a480b2b1bae274f2c46294a8347df664a0016 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Mon, 27 Nov 2023 06:54:35 +0200 Subject: Fix exit status of system on MacOS. --- bugs-fixed/REGRESS | 6 +++++- bugs-fixed/system-status.ok2 | 3 +++ run.c | 11 ++++++----- 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 bugs-fixed/system-status.ok2 diff --git a/bugs-fixed/REGRESS b/bugs-fixed/REGRESS index 98d578a..acdbeeb 100755 --- a/bugs-fixed/REGRESS +++ b/bugs-fixed/REGRESS @@ -11,6 +11,7 @@ do echo === $i OUT=${i%.awk}.OUT OK=${i%.awk}.ok + OK2=${i%.awk}.ok2 IN=${i%.awk}.in input= if [ -f $IN ] @@ -20,9 +21,12 @@ do ../a.out -f $i $input > $OUT 2>&1 if cmp -s $OK $OUT + then + rm -f $OUT + elif [ -f $OK2 ] && cmp -s $OK2 $OUT then rm -f $OUT else - echo ++++ $i failed! + echo '++++ $i failed!' fi done diff --git a/bugs-fixed/system-status.ok2 b/bugs-fixed/system-status.ok2 new file mode 100644 index 0000000..f1f631e --- /dev/null +++ b/bugs-fixed/system-status.ok2 @@ -0,0 +1,3 @@ +normal status 42 +death by signal status 257 +death by signal with core dump status 262 diff --git a/run.c b/run.c index 7462c38..8a8ca68 100644 --- a/run.c +++ b/run.c @@ -2065,6 +2065,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis Node *nextarg; FILE *fp; int status = 0; + int estatus = 0; t = ptoi(a[0]); x = execute(a[1]); @@ -2108,19 +2109,19 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis case FSYSTEM: fflush(stdout); /* in case something is buffered already */ status = system(getsval(x)); - u = status; if (status != -1) { if (WIFEXITED(status)) { - u = WEXITSTATUS(status); + estatus = WEXITSTATUS(status); } else if (WIFSIGNALED(status)) { - u = WTERMSIG(status) + 256; + estatus = WTERMSIG(status) + 256; #ifdef WCOREDUMP if (WCOREDUMP(status)) - u += 256; + estatus += 256; #endif } else /* something else?!? */ - u = 0; + estatus = 0; } + u = estatus; break; case FRAND: /* random() returns numbers in [0..2^31-1] -- cgit v1.2.3 From 0096fa60008346f24de36064930c911ced766961 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Mon, 27 Nov 2023 18:21:03 +0200 Subject: Fix after review comment. --- run.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/run.c b/run.c index 8a8ca68..02b64b3 100644 --- a/run.c +++ b/run.c @@ -2108,7 +2108,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis break; case FSYSTEM: fflush(stdout); /* in case something is buffered already */ - status = system(getsval(x)); + estatus = status = system(getsval(x)); if (status != -1) { if (WIFEXITED(status)) { estatus = WEXITSTATUS(status); @@ -2121,6 +2121,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis } else /* something else?!? */ estatus = 0; } + /* else estatus was set to -1 */ u = estatus; break; case FRAND: -- cgit v1.2.3 From a9642137999b3cefaca03d53e6f42b0a458fa13b Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Mon, 27 Nov 2023 23:13:53 -0500 Subject: Wilbert van der Poel's fixes to inconsistent handling of -F and --csv. --- lib.c | 2 +- main.c | 4 ++++ testdir/T.csv | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib.c b/lib.c index b5b83f8..7fb9ac3 100644 --- a/lib.c +++ b/lib.c @@ -399,7 +399,7 @@ void fldbld(void) /* create fields from current record */ i = 0; /* number of fields accumulated here */ if (inputFS == NULL) /* make sure we have a copy of FS */ savefs(); - if (strlen(inputFS) > 1) { /* it's a regular expression */ + if (!CSV && strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); } else if (!CSV && (sep = *inputFS) == ' ') { /* default whitespace */ for (i = 0; ; ) { diff --git a/main.c b/main.c index c478e32..a5d4f1c 100644 --- a/main.c +++ b/main.c @@ -157,6 +157,8 @@ int main(int argc, char *argv[]) } if (strcmp(argv[1], "--csv") == 0) { /* turn on csv input processing */ CSV = true; + if (fs) + WARNING("danger: don't set FS when --csv is in effect"); argc--; argv++; continue; @@ -178,6 +180,8 @@ int main(int argc, char *argv[]) break; case 'F': /* set field separator */ fs = setfs(getarg(&argc, &argv, "no field separator")); + if (CSV) + WARNING("danger: don't set FS when --csv is in effect"); break; case 'v': /* -v a=1 to be done NOW. one -v for each */ vn = getarg(&argc, &argv, "no variable name"); diff --git a/testdir/T.csv b/testdir/T.csv index 79c1510..40ef0c6 100755 --- a/testdir/T.csv +++ b/testdir/T.csv @@ -17,8 +17,8 @@ $1 ~ /try/ { # new test sub(/try /, "") prog = $0 printf("%3d %s\n", nt, prog) - prog = sprintf("%s -F\"\\t\" '"'"'%s'"'"'", awk, prog) - # print "prog is", prog + prog = sprintf("%s '"'"'%s'"'"'", awk, prog) + print "prog is", prog nt2 = 0 while (getline > 0) { if (NF == 0) # blank line terminates a sequence -- cgit v1.2.3 From 5b55b5f48289e2e3613647d6de42e7b74284c391 Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Mon, 27 Nov 2023 23:26:49 -0500 Subject: updated with the latest fixes --- FIXES | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/FIXES b/FIXES index 52f49e3..d77bec2 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,12 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the second edition of the AWK book was published in September 2023. +Nov 27, 2023: + Fix exit status of system on MacOS. update to REGRESS. + Thanks to Arnold Robbins. + Fix inconsistent handling of -F and --csv, and loss of csv + mode when FS is set. Thanks to Wilbert van der Poel. + Nov 24, 2023: Fix issue #199: gototab improvements to dynamically resize the table, qsort and bsearch to improve the lookup speed as the -- cgit v1.2.3 From dbf7cbed8645e65df5bba0c83b566e2bd96e63b6 Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Mon, 27 Nov 2023 23:27:44 -0500 Subject: adjust version date: 20231127 --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index a5d4f1c..8c8fb40 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20231124"; +const char *version = "version 20231127"; #define DEBUG #include -- cgit v1.2.3 From 8424e93ad3e63cdfda1ae34e984691c3fe879175 Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Mon, 27 Nov 2023 23:31:28 -0500 Subject: comment out print stmt for prog.s --- testdir/T.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testdir/T.csv b/testdir/T.csv index 40ef0c6..e0f3d70 100755 --- a/testdir/T.csv +++ b/testdir/T.csv @@ -18,7 +18,7 @@ $1 ~ /try/ { # new test prog = $0 printf("%3d %s\n", nt, prog) prog = sprintf("%s '"'"'%s'"'"'", awk, prog) - print "prog is", prog + # print "prog is", prog nt2 = 0 while (getline > 0) { if (NF == 0) # blank line terminates a sequence -- cgit v1.2.3 From fcc0e7b0f8eb8bdaf909b03f5b4d0f0833cf4292 Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Thu, 28 Dec 2023 14:45:44 -0500 Subject: updated. --- FIXES | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/FIXES b/FIXES index d77bec2..21efda5 100644 --- a/FIXES +++ b/FIXES @@ -25,11 +25,18 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the second edition of the AWK book was published in September 2023. +Dec 24, 2023: + matchop dereference after free problem fix when the first + argument is a function call. thanks to Oguz Ismail Uysal. + Fix inconsistent handling of --csv and FS set in the + command line. Thanks to Wilbert van der Poel. + casting changes to int for is* functions. + Nov 27, 2023: Fix exit status of system on MacOS. update to REGRESS. Thanks to Arnold Robbins. Fix inconsistent handling of -F and --csv, and loss of csv - mode when FS is set. Thanks to Wilbert van der Poel. + mode when FS is set. Nov 24, 2023: Fix issue #199: gototab improvements to dynamically resize the -- cgit v1.2.3 From b7461b63f231d2d45a5309e6ee207276e7c44a98 Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Thu, 28 Dec 2023 14:48:42 -0500 Subject: cast to int for isspace, isalnum et al. --- b.c | 4 ++-- lib.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/b.c b/b.c index 881c052..db96d5f 100644 --- a/b.c +++ b/b.c @@ -346,7 +346,7 @@ int hexstr(const uschar **pp, int max) /* find and eval hex string at pp, return int i; for (i = 0, p = *pp; i < max && isxdigit(*p); i++, p++) { - if (isdigit(*p)) + if (isdigit((int) *p)) n = 16 * n + *p - '0'; else if (*p >= 'a' && *p <= 'f') n = 16 * n + *p - 'a' + 10; @@ -1389,7 +1389,7 @@ rescan: } break; case '{': - if (isdigit(*(prestr))) { + if (isdigit((int) *(prestr))) { num = 0; /* Process as a repetition */ n = -1; m = -1; commafound = false; diff --git a/lib.c b/lib.c index 7fb9ac3..0dac1f9 100644 --- a/lib.c +++ b/lib.c @@ -845,10 +845,10 @@ int isclvar(const char *s) /* is s of form var=something ? */ { const char *os = s; - if (!isalpha((uschar) *s) && *s != '_') + if (!isalpha((int) *s) && *s != '_') return 0; for ( ; *s; s++) - if (!(isalnum((uschar) *s) || *s == '_')) + if (!(isalnum((int) *s) || *s == '_')) break; return *s == '=' && s > os; } @@ -883,7 +883,7 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok, if (no_trailing) *no_trailing = false; - while (isspace(*s)) + while (isspace((int) *s)) s++; /* no hex floating point, sorry */ @@ -895,7 +895,7 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok, is_nan = (strncasecmp(s+1, "nan", 3) == 0); is_inf = (strncasecmp(s+1, "inf", 3) == 0); if ((is_nan || is_inf) - && (isspace(s[4]) || s[4] == '\0')) + && (isspace((int) s[4]) || s[4] == '\0')) goto convert; else if (! isdigit(s[1]) && s[1] != '.') return false; @@ -918,7 +918,7 @@ convert: /* * check for trailing stuff */ - while (isspace(*ep)) + while (isspace((int) *ep)) ep++; if (no_trailing != NULL) -- cgit v1.2.3 From 08fa2b8c53138ee1162a2b9be78e192adcce26ec Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Thu, 28 Dec 2023 14:49:20 -0500 Subject: adjust version date: 20231228 Fix inconsistent handling of --csv and FS set via -v. --- main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.c b/main.c index 8c8fb40..58f1541 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20231127"; +const char *version = "version 20231228"; #define DEBUG #include @@ -157,8 +157,6 @@ int main(int argc, char *argv[]) } if (strcmp(argv[1], "--csv") == 0) { /* turn on csv input processing */ CSV = true; - if (fs) - WARNING("danger: don't set FS when --csv is in effect"); argc--; argv++; continue; @@ -180,8 +178,6 @@ int main(int argc, char *argv[]) break; case 'F': /* set field separator */ fs = setfs(getarg(&argc, &argv, "no field separator")); - if (CSV) - WARNING("danger: don't set FS when --csv is in effect"); break; case 'v': /* -v a=1 to be done NOW. one -v for each */ vn = getarg(&argc, &argv, "no variable name"); @@ -203,6 +199,10 @@ int main(int argc, char *argv[]) argc--; argv++; } + + if (CSV && (fs != NULL || lookup("FS", symtab) != NULL)) + WARNING("danger: don't set FS when --csv is in effect"); + /* argv[1] is now the first argument */ if (npfile == 0) { /* no -f; first argument is program */ if (argc <= 1) { -- cgit v1.2.3 From 04f69eaf0b99d6c0ca65115973830218ba8a2b72 Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Thu, 28 Dec 2023 14:51:49 -0500 Subject: test for matchop dereference bug --- bugs-fixed/matchop-deref.awk | 11 +++++++++++ bugs-fixed/matchop-deref.bad | 2 ++ bugs-fixed/matchop-deref.in | 1 + bugs-fixed/matchop-deref.ok | 2 ++ 4 files changed, 16 insertions(+) create mode 100644 bugs-fixed/matchop-deref.awk create mode 100644 bugs-fixed/matchop-deref.bad create mode 100644 bugs-fixed/matchop-deref.in create mode 100644 bugs-fixed/matchop-deref.ok diff --git a/bugs-fixed/matchop-deref.awk b/bugs-fixed/matchop-deref.awk new file mode 100644 index 0000000..6c066aa --- /dev/null +++ b/bugs-fixed/matchop-deref.awk @@ -0,0 +1,11 @@ +function foo() { + return "aaaaaab" +} + +BEGIN { + print match(foo(), "b") +} + +{ + print match(substr($0, 1), "b") +} diff --git a/bugs-fixed/matchop-deref.bad b/bugs-fixed/matchop-deref.bad new file mode 100644 index 0000000..343ee5c --- /dev/null +++ b/bugs-fixed/matchop-deref.bad @@ -0,0 +1,2 @@ +-1 +-1 diff --git a/bugs-fixed/matchop-deref.in b/bugs-fixed/matchop-deref.in new file mode 100644 index 0000000..0d197e1 --- /dev/null +++ b/bugs-fixed/matchop-deref.in @@ -0,0 +1 @@ +aaaaaab diff --git a/bugs-fixed/matchop-deref.ok b/bugs-fixed/matchop-deref.ok new file mode 100644 index 0000000..49019db --- /dev/null +++ b/bugs-fixed/matchop-deref.ok @@ -0,0 +1,2 @@ +7 +7 -- cgit v1.2.3 From 908be9c222c0b7da4bcc3b8724ebb11996993a69 Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Thu, 28 Dec 2023 14:53:25 -0500 Subject: fix for matchop dereferencing a pointer x->sval after freeing x. this bug was introduced with UTF-8 support changes. example: $ echo aaaaaab | ./a.out '{print match(substr($0, 1), "b")}' -1 --- run.c | 12 +++++++----- testdir/T.overflow | 2 ++ testdir/T.split | 1 + 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/run.c b/run.c index 02b64b3..098afbb 100644 --- a/run.c +++ b/run.c @@ -795,7 +795,7 @@ int runetochar(char *str, int c) Cell *matchop(Node **a, int n) /* ~ and match() */ { - Cell *x, *y; + Cell *x, *y, *z; char *s, *t; int i; int cstart, cpatlen, len; @@ -817,7 +817,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */ i = (*mf)(pfa, s); tempfree(y); } - tempfree(x); + z = x; if (n == MATCHFCN) { int start = patbeg - s + 1; /* origin 1 */ if (patlen < 0) { @@ -839,11 +839,13 @@ Cell *matchop(Node **a, int n) /* ~ and match() */ x = gettemp(); x->tval = NUM; x->fval = start; - return x; } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) - return(True); + x = True; else - return(False); + x = False; + + tempfree(z); + return x; } diff --git a/testdir/T.overflow b/testdir/T.overflow index d3d97d4..ac9c0bd 100755 --- a/testdir/T.overflow +++ b/testdir/T.overflow @@ -84,3 +84,5 @@ grep "out of range field" foo >/dev/null || echo 1>&2 "BAD: T.overflow \$400000" rm -rf /tmp/awktestfoo* $awk 'BEGIN { for (i=1; i <= 1000; i++) print i >("/tmp/awktestfoo" i) }' ls /tmp/awktestfoo* | grep '1000' >/dev/null || echo 1>&2 "BAD: T.overflow openfiles" +rm -rf /tmp/awktestfoo* +exit 0 diff --git a/testdir/T.split b/testdir/T.split index f7b24ba..d938404 100755 --- a/testdir/T.split +++ b/testdir/T.split @@ -220,5 +220,6 @@ $awk 'BEGIN { echo 'cat dog' > $TEMP2 diff $TEMP1 $TEMP2 || fail 'BAD: T.split(a, b, "[\r\n]+")' +rm -rf $WORKDIR exit $RESULT -- cgit v1.2.3 From ee8484a4a2cb6ac3b263f4da0fe83cabf8683ddb Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Mon, 22 Jan 2024 07:45:31 +0200 Subject: Restore ability to compile with g++. --- FIXES | 4 ++++ b.c | 28 +++++++++++++--------------- main.c | 2 +- run.c | 5 +++-- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/FIXES b/FIXES index 21efda5..3b05925 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,10 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the second edition of the AWK book was published in September 2023. +Jan 22, 2024: + Restore the ability to compile with g++. Thanks to + Arnold Robbins. + Dec 24, 2023: matchop dereference after free problem fix when the first argument is a function call. thanks to Oguz Ismail Uysal. diff --git a/b.c b/b.c index db96d5f..0ac7b9f 100644 --- a/b.c +++ b/b.c @@ -116,7 +116,7 @@ static int entry_cmp(const void *l, const void *r); static int get_gototab(fa*, int, int); static int set_gototab(fa*, int, int, int); static void clear_gototab(fa*, int); -extern int u8_rune(int *, const uschar *); +extern int u8_rune(int *, const char *); static int * intalloc(size_t n, const char *f) @@ -416,7 +416,7 @@ int *cclenter(const char *argp) /* add a character class */ FATAL("out of space for character class [%.10s...] 1", p); bp = buf; for (i = 0; *p != 0; ) { - n = u8_rune(&c, p); + n = u8_rune(&c, (const char *) p); p += n; if (c == '\\') { c = quoted(&p); @@ -424,7 +424,7 @@ int *cclenter(const char *argp) /* add a character class */ if (*p != 0) { c = bp[-1]; /* c2 = *p++; */ - n = u8_rune(&c2, p); + n = u8_rune(&c2, (const char *) p); p += n; if (c2 == '\\') c2 = quoted(&p); /* BUG: sets p, has to be u8 size */ @@ -618,7 +618,7 @@ static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation key.ch = ch; key.state = 0; /* irrelevant */ - item = bsearch(& key, f->gototab[state].entries, + item = (gtte *) bsearch(& key, f->gototab[state].entries, f->gototab[state].inuse, sizeof(gtte), entry_cmp); @@ -662,7 +662,7 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplem key.ch = ch; key.state = 0; /* irrelevant */ - item = bsearch(& key, f->gototab[state].entries, + item = (gtte *) bsearch(& key, f->gototab[state].entries, f->gototab[state].inuse, sizeof(gtte), entry_cmp); @@ -710,7 +710,7 @@ int match(fa *f, const char *p0) /* shortest match ? */ return(1); do { /* assert(*p < NCHARS); */ - n = u8_rune(&rune, p); + n = u8_rune(&rune, (const char *) p); if ((ns = get_gototab(f, s, rune)) != 0) s = ns; else @@ -743,7 +743,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ if (f->out[s]) /* final state */ patlen = q-p; /* assert(*q < NCHARS); */ - n = u8_rune(&rune, q); + n = u8_rune(&rune, (const char *) q); if ((ns = get_gototab(f, s, rune)) != 0) s = ns; else @@ -774,7 +774,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ s = 2; if (*p == 0) break; - n = u8_rune(&rune, p); + n = u8_rune(&rune, (const char *) p); p += n; } while (1); /* was *p++ */ return (0); @@ -799,7 +799,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ if (f->out[s]) /* final state */ patlen = q-p; /* assert(*q < NCHARS); */ - n = u8_rune(&rune, q); + n = u8_rune(&rune, (const char *) q); if ((ns = get_gototab(f, s, rune)) != 0) s = ns; else @@ -887,7 +887,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) } } - j += u8_rune(&c, (uschar *)j); + j += u8_rune(&c, j); if ((ns = get_gototab(pfa, s, c)) != 0) s = ns; @@ -907,7 +907,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) break; /* best match found */ /* no match at origin i, next i and start over */ - i += u8_rune(&c, (uschar *)i); + i += u8_rune(&c, i); if (c == 0) break; /* no match */ j = i; @@ -1229,8 +1229,6 @@ static int repeat(const uschar *reptok, int reptoklen, const uschar *atom, return 0; } -extern int u8_rune(int *, const uschar *); /* run.c; should be in header file */ - int relex(void) /* lexical analyzer for reparse */ { int c, n; @@ -1248,7 +1246,7 @@ int relex(void) /* lexical analyzer for reparse */ rescan: starttok = prestr; - if ((n = u8_rune(&rlxval, prestr)) > 1) { + if ((n = u8_rune(&rlxval, (const char *) prestr)) > 1) { prestr += n; starttok = prestr; return CHAR; @@ -1295,7 +1293,7 @@ rescan: if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1")) FATAL("out of space for reg expr %.10s...", lastre); for (; ; ) { - if ((n = u8_rune(&rlxval, prestr)) > 1) { + if ((n = u8_rune(&rlxval, (const char *) prestr)) > 1) { for (i = 0; i < n; i++) *bp++ = *prestr++; continue; diff --git a/main.c b/main.c index 58f1541..73af89e 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20231228"; +const char *version = "version 20240122"; #define DEBUG #include diff --git a/run.c b/run.c index 098afbb..799e998 100644 --- a/run.c +++ b/run.c @@ -1300,7 +1300,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co if (bs == NULL) { // invalid character // use unicode invalid character, 0xFFFD - bs = "\357\277\275"; + static char invalid_char[] = "\357\277\275"; + bs = invalid_char; count = 3; } t = bs; @@ -2448,7 +2449,7 @@ Cell *dosub(Node **a, int subop) /* sub and gsub */ start = getsval(x); while (pmatch(pfa, start)) { if (buf == NULL) { - if ((pb = buf = malloc(bufsz)) == NULL) + if ((pb = buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in dosub"); tempstat = pfa->initstat; pfa->initstat = 2; -- cgit v1.2.3 From 6a07a6d3bb6313714625f667470290e71545b270 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Mon, 5 Feb 2024 08:49:22 +0200 Subject: Fix some typos in b.c; Improve prose in README.md and FIXES. --- FIXES | 39 +++++++++++++++++++-------------------- README.md | 5 ++++- b.c | 6 +++--- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/FIXES b/FIXES index 3b05925..a043b35 100644 --- a/FIXES +++ b/FIXES @@ -30,14 +30,14 @@ Jan 22, 2024: Arnold Robbins. Dec 24, 2023: - matchop dereference after free problem fix when the first - argument is a function call. thanks to Oguz Ismail Uysal. + Matchop dereference after free problem fix when the first + argument is a function call. Thanks to Oguz Ismail Uysal. Fix inconsistent handling of --csv and FS set in the command line. Thanks to Wilbert van der Poel. - casting changes to int for is* functions. + Casting changes to int for is* functions. Nov 27, 2023: - Fix exit status of system on MacOS. update to REGRESS. + Fix exit status of system on MacOS. Update to REGRESS. Thanks to Arnold Robbins. Fix inconsistent handling of -F and --csv, and loss of csv mode when FS is set. @@ -45,7 +45,7 @@ Nov 27, 2023: Nov 24, 2023: Fix issue #199: gototab improvements to dynamically resize the table, qsort and bsearch to improve the lookup speed as the - table gets larger for multibyte input. thanks to Arnold Robbins. + table gets larger for multibyte input. Thanks to Arnold Robbins. Nov 23, 2023: Fix Issue #169, related to escape sequences in strings. @@ -54,29 +54,29 @@ Nov 23, 2023: by Miguel Pineiro Jr. Nov 20, 2023: - rewrite of fnematch to fix a number of issues, including + Rewrite of fnematch to fix a number of issues, including extraneous output, out-of-bounds access, number of bytes to push back after a failed match etc. - thanks to Miguel Pineiro Jr. + Thanks to Miguel Pineiro Jr. Nov 15, 2023: - Man page edit, regression test fixes. thanks to Arnold Robbins - consolidation of sub and gsub into dosub, removing duplicate - code. thanks to Miguel Pineiro Jr. + Man page edit, regression test fixes. Thanks to Arnold Robbins + Consolidation of sub and gsub into dosub, removing duplicate + code. Thanks to Miguel Pineiro Jr. gcc replaced with cc everywhere. Oct 30, 2023: - multiple fixes and a minor code cleanup. - disabled utf-8 for non-multibyte locales, such as C or POSIX. - fixed a bad char * cast that causes incorrect results on big-endian - systems. also fixed an out-of-bounds read for empty CCL. - fixed a buffer overflow in substr with utf-8 strings. - many thanks to Todd C Miller. + Multiple fixes and a minor code cleanup. + Disabled utf-8 for non-multibyte locales, such as C or POSIX. + Fixed a bad char * cast that causes incorrect results on big-endian + systems. Also fixed an out-of-bounds read for empty CCL. + Fixed a buffer overflow in substr with utf-8 strings. + Many thanks to Todd C Miller. Sep 24, 2023: fnematch and getrune have been overhauled to solve issues around - unicode FS and RS. also fixed gsub null match issue with unicode. - big thanks to Arnold Robbins. + unicode FS and RS. Also fixed gsub null match issue with unicode. + Big thanks to Arnold Robbins. Sep 12, 2023: Fixed a length error in u8_byte2char that set RSTART to @@ -101,9 +101,8 @@ Sep 12, 2023: of a string of 3 emojis is 3, not 12 as it would be if bytes were counted. - Regular expressions are processes as UTF-8. + Regular expressions are processed as UTF-8. Unicode literals can be written as \u followed by one to eight hexadecimal digits. These may appear in strings and regular expressions. - diff --git a/README.md b/README.md index 84fb06e..a41fb3c 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Regular expressions may include UTF-8 code points, including `\u`. The option `--csv` turns on CSV processing of input: fields are separated by commas, fields may be quoted with double-quote (`"`) characters, quoted fields may contain embedded newlines. +Double-quotes in fields have to be doubled and enclosed in quoted fields. In CSV mode, `FS` is ignored. If no explicit separator argument is provided, @@ -117,6 +118,8 @@ move this to some place like `/usr/bin/awk`. If your system does not have `yacc` or `bison` (the GNU equivalent), you need to install one of them first. +The default in the `makefile` is `bison`; you will have +to edit the `makefile` to use `yacc`. NOTE: This version uses ISO/IEC C99, as you should also. We have compiled this without any changes using `gcc -Wall` and/or local C @@ -143,4 +146,4 @@ is not at the top of our priority list. #### Last Updated -Mon 16 Oct 2023 11:23:08 IDT +Mon 05 Feb 2024 08:46:55 IST diff --git a/b.c b/b.c index 0ac7b9f..4c438fa 100644 --- a/b.c +++ b/b.c @@ -607,11 +607,11 @@ static void resize_gototab(fa *f, int state) size_t orig_size = f->gototab[state].allocated; // 2nd half of new mem is this size memset(p + orig_size, 0, orig_size * sizeof(gtte)); // clean it out - f->gototab[state].allocated = new_size; // update gotottab info + f->gototab[state].allocated = new_size; // update gototab info f->gototab[state].entries = p; } -static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation */ +static int get_gototab(fa *f, int state, int ch) /* hide gototab implementation */ { gtte key; gtte *item; @@ -638,7 +638,7 @@ static int entry_cmp(const void *l, const void *r) return left->ch - right->ch; } -static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplementation */ +static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implementation */ { if (f->gototab[state].inuse == 0) { f->gototab[state].entries[0].ch = ch; -- cgit v1.2.3 From 77c8ffa70db4e9819881ec676d3f13f550d02055 Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Mon, 11 Mar 2024 19:46:47 -0400 Subject: fix for fnematch use-after-free bug resulting from adjbuf invalidating the pointers to buf. this is Miguel Pineiro Jr.'s code. thanks to github user caffe for spotting this. this code demonstrates the problem: printf "%8192s\n" | tr " " "=" | ./a.out 'BEGIN{RS="th[^h]+"}{}' MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max. --- b.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/b.c b/b.c index 4c438fa..870eecf 100644 --- a/b.c +++ b/b.c @@ -830,8 +830,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ } -#define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long - /* * NAME * fnematch @@ -868,16 +866,28 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) do { /* - * Call u8_rune with at least MAX_UTF_BYTES ahead in + * Call u8_rune with at least awk_mb_cur_max ahead in * the buffer until EOF interferes. */ - if (k - j < MAX_UTF_BYTES) { - if (k + MAX_UTF_BYTES > buf + bufsize) { + if (k - j < awk_mb_cur_max) { + if (k + awk_mb_cur_max > buf + bufsize) { + char *obuf = buf; adjbuf((char **) &buf, &bufsize, - bufsize + MAX_UTF_BYTES, + bufsize + awk_mb_cur_max, quantum, 0, "fnematch"); + + /* buf resized, maybe moved. update pointers */ + *pbufsize = bufsize; + if (obuf != buf) { + i = buf + (i - obuf); + j = buf + (j - obuf); + k = buf + (k - obuf); + *pbuf = buf; + if (patlen) + patbeg = buf + (patbeg - obuf); + } } - for (n = MAX_UTF_BYTES ; n > 0; n--) { + for (n = awk_mb_cur_max ; n > 0; n--) { *k++ = (c = getc(f)) != EOF ? c : 0; if (c == EOF) { if (ferror(f)) @@ -914,10 +924,6 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) s = 2; } while (1); - /* adjbuf() may have relocated a resized buffer. Inform the world. */ - *pbuf = buf; - *pbufsize = bufsize; - if (patlen) { /* * Under no circumstances is the last character fed to -- cgit v1.2.3 From b1d9d2818b21df8234d305b91d2c8e21bea78b6b Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Mon, 11 Mar 2024 19:54:10 -0400 Subject: updated. --- FIXES | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/FIXES b/FIXES index a043b35..33a36fc 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,14 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the second edition of the AWK book was published in September 2023. +Mar 10, 2024: + fixed use-after-free bug in fnematch due to adjbuf invalidating + the pointers to buf. thanks to github user caffe3 for spotting + the issue and providing a fix, and to Miguel Pineiro Jr. + for the alternative fix. + MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max. + thanks to Miguel Pineiro Jr. + Jan 22, 2024: Restore the ability to compile with g++. Thanks to Arnold Robbins. -- cgit v1.2.3 From eb57744daf460cc9149c731a4448c52d476798eb Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Mon, 11 Mar 2024 19:54:41 -0400 Subject: fix error messages. --- bugs-fixed/REGRESS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugs-fixed/REGRESS b/bugs-fixed/REGRESS index acdbeeb..30bdc7c 100755 --- a/bugs-fixed/REGRESS +++ b/bugs-fixed/REGRESS @@ -27,6 +27,6 @@ do then rm -f $OUT else - echo '++++ $i failed!' + echo "+++ $i failed!" fi done -- cgit v1.2.3 From 4d46214525a3b75879ad56fb0105ee01afa7c9dd Mon Sep 17 00:00:00 2001 From: ozan yigit Date: Mon, 11 Mar 2024 19:55:50 -0400 Subject: adjust version date: 20240311 --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index 73af89e..5bc1272 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20240122"; +const char *version = "version 20240311"; #define DEBUG #include -- cgit v1.2.3