diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2023-11-17 12:51:20 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2023-11-17 12:51:20 +0200 |
commit | 12793c04b28c84e0af830c81ea51c90ab65eb585 (patch) | |
tree | 3dfcc4c784f2da755a115b2ede01253ed963b32f | |
parent | cf7cbbb1583eed2c961366f6ab62f14548052091 (diff) | |
parent | 9e254e503f844e122870e9488db3d7b0233e554c (diff) | |
download | one-true-awk-12793c04b28c84e0af830c81ea51c90ab65eb585.tar.gz |
Merge branch 'master' into improve-gototab
-rw-r--r-- | FIXES | 6 | ||||
-rw-r--r-- | awk.1 | 3 | ||||
-rwxr-xr-x | bugs-fixed/REGRESS | 2 | ||||
-rw-r--r-- | main.c | 2 | ||||
-rw-r--r-- | makefile | 8 | ||||
-rw-r--r-- | maketab.c | 4 | ||||
-rw-r--r-- | proto.h | 3 | ||||
-rw-r--r-- | run.c | 270 | ||||
-rwxr-xr-x | testdir/Compare.tt | 2 | ||||
-rwxr-xr-x | testdir/REGRESS | 2 | ||||
-rwxr-xr-x | testdir/T.csv | 1 | ||||
-rwxr-xr-x | testdir/T.flags | 5 |
12 files changed, 145 insertions, 163 deletions
@@ -25,6 +25,12 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the second edition of the AWK book was published in September 2023. +Nov 15, 2023 + Man page edit, regression test fixes. thanks to Arnold Robbins + consolidation of sub and gsub into dosub, removing duplicate + code. thanks to Miguel Pineiro Jr. + gcc replaced with cc everywhere. + Oct 30, 2023: multiple fixes and a minor code cleanup. disabled utf-8 for non-multibyte locales, such as C or POSIX. @@ -586,6 +586,9 @@ the syntax is worse. .PP Input is expected to be UTF-8 encoded. Other multibyte character sets are not handled. +However, in eight-bit locales, +.I awk +treats each input byte as a separate character. .SH UNUSUAL FLOATING-POINT VALUES .I Awk was designed before IEEE 754 arithmetic defined Not-A-Number (NaN) diff --git a/bugs-fixed/REGRESS b/bugs-fixed/REGRESS index 0716003..98d578a 100755 --- a/bugs-fixed/REGRESS +++ b/bugs-fixed/REGRESS @@ -1,4 +1,4 @@ -#! /bin/bash +#! /bin/sh if [ ! -f ../a.out ] then @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20231030"; +const char *version = "version 20231116"; #define DEBUG #include <stdio.h> @@ -28,10 +28,10 @@ CFLAGS = CFLAGS = -O2 # compiler options -#CC = gcc -Wall -g -Wwrite-strings -#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing -#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov -HOSTCC = gcc -g -Wall -pedantic -Wcast-qual +#CC = cc -Wall -g -Wwrite-strings +#CC = cc -O4 -Wall -pedantic -fno-strict-aliasing +#CC = cc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov +HOSTCC = cc -g -Wall -pedantic -Wcast-qual CC = $(HOSTCC) # change this is cross-compiling. # By fiat, to make our lives easier, yacc is now defined to be bison. @@ -52,8 +52,8 @@ struct xx { ARRAY, "array", NULL }, { INDIRECT, "indirect", "$(" }, { SUBSTR, "substr", "substr" }, - { SUB, "sub", "sub" }, - { GSUB, "gsub", "gsub" }, + { SUB, "dosub", "sub" }, + { GSUB, "dosub", "gsub" }, { INDEX, "sindex", "sindex" }, { SPRINTF, "awksprintf", "sprintf " }, { ADD, "arith", " + " }, @@ -196,8 +196,7 @@ extern FILE *openfile(int, const char *, bool *); extern const char *filename(FILE *); extern Cell *closefile(Node **, int); extern void closeall(void); -extern Cell *sub(Node **, int); -extern Cell *gsub(Node **, int); +extern Cell *dosub(Node **, int); extern FILE *popen(const char *, const char *); extern int pclose(FILE *); @@ -2397,169 +2397,143 @@ static void flush_all(void) void backsub(char **pb_ptr, const char **sptr_ptr); -Cell *sub(Node **a, int nnn) /* substitute command */ +Cell *dosub(Node **a, int subop) /* sub and gsub */ { - const char *sptr, *q; - Cell *x, *y, *result; - char *t, *buf, *pb; fa *pfa; + int tempstat; + char *repl; + Cell *x; + + char *buf = NULL; + char *pb = NULL; int bufsz = recsize; - if ((buf = (char *) malloc(bufsz)) == NULL) - FATAL("out of memory in sub"); - x = execute(a[3]); /* target string */ - t = getsval(x); - if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ - pfa = (fa *) a[1]; /* regular expression */ - else { - y = execute(a[1]); - pfa = makedfa(getsval(y), 1); - tempfree(y); + const char *r, *s; + const char *start; + const char *noempty = NULL; /* empty match disallowed here */ + size_t m = 0; /* match count */ + size_t whichm; /* which match to select, 0 = global */ + int mtype; /* match type */ + + if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */ + pfa = (fa *) a[1]; + } else { + x = execute(a[1]); + pfa = makedfa(getsval(x), 1); + tempfree(x); } - y = execute(a[2]); /* replacement string */ - result = False; - if (pmatch(pfa, t)) { - sptr = t; - adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); - pb = buf; - while (sptr < patbeg) - *pb++ = *sptr++; - sptr = getsval(y); - while (*sptr != '\0') { - adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); - if (*sptr == '\\') { - backsub(&pb, &sptr); - } else if (*sptr == '&') { - sptr++; - adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); - for (q = patbeg; q < patbeg+patlen; ) - *pb++ = *q++; - } else - *pb++ = *sptr++; + + x = execute(a[2]); /* replacement string */ + repl = tostring(getsval(x)); + tempfree(x); + + switch (subop) { + case SUB: + whichm = 1; + x = execute(a[3]); /* source string */ + break; + case GSUB: + whichm = 0; + x = execute(a[3]); /* source string */ + break; + default: + FATAL("dosub: unrecognized subop: %d", subop); + } + + start = getsval(x); + while (pmatch(pfa, start)) { + if (buf == NULL) { + if ((pb = buf = malloc(bufsz)) == NULL) + FATAL("out of memory in dosub"); + tempstat = pfa->initstat; + pfa->initstat = 2; } - *pb = '\0'; - if (pb > buf + bufsz) - FATAL("sub result1 %.30s too big; can't happen", buf); - sptr = patbeg + patlen; - if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { - adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); - while ((*pb++ = *sptr++) != '\0') - continue; + + /* match types */ + #define MT_IGNORE 0 /* unselected or invalid */ + #define MT_INSERT 1 /* selected, empty */ + #define MT_REPLACE 2 /* selected, not empty */ + + /* an empty match just after replacement is invalid */ + + if (patbeg == noempty && patlen == 0) { + mtype = MT_IGNORE; /* invalid, not counted */ + } else if (whichm == ++m || whichm == 0) { + mtype = patlen ? MT_REPLACE : MT_INSERT; + } else { + mtype = MT_IGNORE; /* unselected, but counted */ } - if (pb > buf + bufsz) - FATAL("sub result2 %.30s too big; can't happen", buf); - setsval(x, buf); /* BUG: should be able to avoid copy */ - result = True; - } - tempfree(x); - tempfree(y); - free(buf); - return result; -} -Cell *gsub(Node **a, int nnn) /* global substitute */ -{ - Cell *x, *y; - char *rptr, *pb; - const char *q, *t, *sptr; - char *buf; - fa *pfa; - int mflag, tempstat, num; - int bufsz = recsize; - int charlen = 0; + /* leading text: */ + if (patbeg > start) { + adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), + recsize, &pb, "dosub"); + s = start; + while (s < patbeg) + *pb++ = *s++; + } - if ((buf = (char *) malloc(bufsz)) == NULL) - FATAL("out of memory in gsub"); - mflag = 0; /* if mflag == 0, can replace empty string */ - num = 0; - x = execute(a[3]); /* target string */ - t = getsval(x); - if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ - pfa = (fa *) a[1]; /* regular expression */ - else { - y = execute(a[1]); - pfa = makedfa(getsval(y), 1); - tempfree(y); - } - y = execute(a[2]); /* replacement string */ - if (pmatch(pfa, t)) { - tempstat = pfa->initstat; - pfa->initstat = 2; - pb = buf; - rptr = getsval(y); - do { - if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ - if (mflag == 0) { /* can replace empty */ - num++; - sptr = rptr; - while (*sptr != '\0') { - adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); - if (*sptr == '\\') { - backsub(&pb, &sptr); - } else if (*sptr == '&') { - sptr++; - adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); - for (q = patbeg; q < patbeg+patlen; ) - *pb++ = *q++; - } else - *pb++ = *sptr++; - } - } - if (*t == '\0') /* at end */ - goto done; - adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); - charlen = u8_nextlen(t); - while (charlen-- > 0) - *pb++ = *t++; - if (pb > buf + bufsz) /* BUG: not sure of this test */ - FATAL("gsub result0 %.30s too big; can't happen", buf); - mflag = 0; + if (mtype == MT_IGNORE) + goto matching_text; /* skip replacement text */ + + r = repl; + while (*r != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); + if (*r == '\\') { + backsub(&pb, &r); + } else if (*r == '&') { + r++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, + &pb, "dosub"); + for (s = patbeg; s < patbeg+patlen; ) + *pb++ = *s++; + } else { + *pb++ = *r++; } - else { /* matched nonempty string */ - num++; - sptr = t; - adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); - while (sptr < patbeg) - *pb++ = *sptr++; - sptr = rptr; - while (*sptr != '\0') { - adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); - if (*sptr == '\\') { - backsub(&pb, &sptr); - } else if (*sptr == '&') { - sptr++; - adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); - for (q = patbeg; q < patbeg+patlen; ) - *pb++ = *q++; - } else - *pb++ = *sptr++; - } - t = patbeg + patlen; - if (patlen == 0 || *t == '\0' || *(t-1) == '\0') - goto done; - if (pb > buf + bufsz) - FATAL("gsub result1 %.30s too big; can't happen", buf); - mflag = 1; - } - } while (pmatch(pfa,t)); - sptr = t; - adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); - while ((*pb++ = *sptr++) != '\0') - continue; - done: if (pb < buf + bufsz) - *pb = '\0'; - else if (*(pb-1) != '\0') - FATAL("gsub result2 %.30s truncated; can't happen", buf); - setsval(x, buf); /* BUG: should be able to avoid copy + free */ + } + +matching_text: + if (mtype == MT_REPLACE || *patbeg == '\0') + goto next_search; /* skip matching text */ + + if (patlen == 0) + patlen = u8_nextlen(patbeg); + adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); + s = patbeg; + while (s < patbeg + patlen) + *pb++ = *s++; + +next_search: + start = patbeg + patlen; + if (m == whichm || *patbeg == '\0') + break; + if (mtype == MT_REPLACE) + noempty = start; + + #undef MT_IGNORE + #undef MT_INSERT + #undef MT_REPLACE + } + + xfree(repl); + + if (buf != NULL) { pfa->initstat = tempstat; + + /* trailing text */ + adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); + while ((*pb++ = *start++) != '\0') + ; + + setsval(x, buf); + free(buf); } + tempfree(x); - tempfree(y); x = gettemp(); x->tval = NUM; - x->fval = num; - free(buf); - return(x); + x->fval = m; + return x; } void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ diff --git a/testdir/Compare.tt b/testdir/Compare.tt index ca828d2..4b297d7 100755 --- a/testdir/Compare.tt +++ b/testdir/Compare.tt @@ -4,7 +4,7 @@ oldawk=${oldawk-awk} awk=${awk-../a.out} echo compiling time.c -gcc time.c -o time +cc time.c -o time time=./time echo time command = $time diff --git a/testdir/REGRESS b/testdir/REGRESS index 5c3667f..b54ce3f 100755 --- a/testdir/REGRESS +++ b/testdir/REGRESS @@ -1,7 +1,7 @@ #!/bin/sh uname -a -gcc echo.c -o echo && echo echo compiled +cc echo.c -o echo && echo echo compiled oldawk=${oldawk-awk} awk=${awk-../a.out} diff --git a/testdir/T.csv b/testdir/T.csv index 10da1ea..79c1510 100755 --- a/testdir/T.csv +++ b/testdir/T.csv @@ -77,5 +77,4 @@ a''b [a''b] a, [a][] "", [][] , [][] -a"b [a"b] !!!! diff --git a/testdir/T.flags b/testdir/T.flags index 33d7c8d..17ce561 100755 --- a/testdir/T.flags +++ b/testdir/T.flags @@ -20,5 +20,6 @@ grep 'unknown option' foo >/dev/null || echo 'T.flags: bad unknown option' $awk -F >foo 2>&1 grep 'no field separator' foo >/dev/null || echo 'T.flags: bad missing field separator' -$awk -F '' >foo 2>&1 -grep 'field separator FS is empty' foo >/dev/null || echo 'T.flags: bad empty field separator' +### Awk is now like gawk and splits into separate characters if FS = "" +# $awk -F '' >foo 2>&1 +# grep 'field separator FS is empty' foo >/dev/null || echo 'T.flags: bad empty field separator' |