diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-11-09 00:08:56 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-11-09 00:08:56 +0000 |
commit | 13f5893f3addb30e157763ad236e7c2156e4b198 (patch) | |
tree | cf19b7bc55e300532fc24f31a0a42ad5a072d521 | |
parent | d83cfdbff9cf794d7925cd5349fcca0717ce4916 (diff) | |
parent | d3e0227f71c19c1d9639a0fcc4ac143d32a4aff2 (diff) | |
download | one-true-awk-13f5893f3addb30e157763ad236e7c2156e4b198.tar.gz |
Snap for 11071763 from d3e0227f71c19c1d9639a0fcc4ac143d32a4aff2 to 24Q1-release
Change-Id: I0ae2045c484a869f1408fc8bd19d98c2ce7f0f61
-rw-r--r-- | FIXES | 9 | ||||
-rw-r--r-- | METADATA | 6 | ||||
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | awk.h | 2 | ||||
-rw-r--r-- | b.c | 8 | ||||
-rw-r--r-- | main.c | 5 | ||||
-rw-r--r-- | run.c | 16 |
7 files changed, 32 insertions, 18 deletions
@@ -25,6 +25,15 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the second edition of the AWK book was published in September 2023. +Oct 30, 2023: + multiple fixes and a minor code cleanup. + disabled utf-8 for non-multibyte locales, such as C or POSIX. + fixed a bad char * cast that causes incorrect results on big-endian + systems. also fixed an out-of-bounds read for empty CCL. + fixed a buffer overflow in substr with utf-8 strings. + many thanks to Todd C Miller. + + Sep 24, 2023: fnematch and getrune have been overhauled to solve issues around unicode FS and RS. also fixed gsub null match issue with unicode. @@ -9,11 +9,11 @@ third_party { type: GIT value: "https://github.com/onetrueawk/awk.git" } - version: "d8e4368964e4471a54a755823004f2b1aabc0f80" + version: "d801514094d1140dfc9f8571b9821082ddddf107" license_type: NOTICE last_upgrade_date { year: 2023 - month: 10 - day: 5 + month: 11 + day: 6 } } @@ -28,7 +28,7 @@ when expanded. The option `--csv` turns on CSV processing of input: fields are separated by commas, fields may be quoted with -double-quote (`"`) characters, fields may contain embedded newlines. +double-quote (`"`) characters, quoted fields may contain embedded newlines. In CSV mode, `FS` is ignored. If no explicit separator argument is provided, @@ -145,4 +145,4 @@ is not at the top of our priority list. #### Last Updated -Sun Sep 3 09:26:43 EDT 2023 +Sun 15 Oct 2023 06:28:36 IDT @@ -64,6 +64,8 @@ extern bool safe; /* false => unsafe, true => safe */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ extern int recsize; /* size of current record, orig RECSIZE */ +extern size_t awk_mb_cur_max; /* max size of a multi-byte character */ + extern char EMPTY[]; /* this avoid -Wwritable-strings issues */ extern char **FS; extern char **RS; @@ -529,7 +529,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ setvec[lp] = 1; setcnt++; } - if (type(p) == CCL && (*(char *) right(p)) == '\0') + if (type(p) == CCL && (*(int *) right(p)) == 0) return(0); /* empty CCL */ return(1); case PLUS: @@ -778,7 +778,7 @@ struct runedata getrune(FILE *fp) c = getc(fp); if (c == EOF) return result; // result.rune == 0 --> EOF - else if (c < 128) { + else if (c < 128 || awk_mb_cur_max == 1) { result.bytes[0] = c; result.len = 1; result.rune = c; @@ -970,7 +970,7 @@ Node *primary(void) rtok = relex(); if (rtok == ')') { /* special pleading for () */ rtok = relex(); - return unary(op2(CCL, NIL, (Node *) tostring(""))); + return unary(op2(CCL, NIL, (Node *) cclenter(""))); } np = regexp(); if (rtok == ')') { @@ -993,7 +993,7 @@ Node *concat(Node *np) return (concat(op2(CAT, np, primary()))); case EMPTYRE: rtok = relex(); - return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")), + return (concat(op2(CAT, op2(CCL, NIL, (Node *) cclenter("")), primary()))); } return (np); @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20231001"; +const char *version = "version 20231030"; #define DEBUG #include <stdio.h> @@ -53,6 +53,8 @@ bool CSV = false; /* true for csv input */ bool safe = false; /* true => "safe" mode */ +size_t awk_mb_cur_max = 1; + static noreturn void fpecatch(int n #ifdef SA_SIGINFO , siginfo_t *si, void *uc @@ -116,6 +118,7 @@ int main(int argc, char *argv[]) setlocale(LC_CTYPE, ""); setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */ + awk_mb_cur_max = MB_CUR_MAX; cmdname = argv[0]; if (argc == 1) { fprintf(stderr, @@ -605,7 +605,7 @@ int u8_isutf(const char *s) unsigned char c; c = s[0]; - if (c < 128) + if (c < 128 || awk_mb_cur_max == 1) return 1; /* what if it's 0? */ n = strlen(s); @@ -632,7 +632,7 @@ int u8_rune(int *rune, const char *s) unsigned char c; c = s[0]; - if (c < 128) { + if (c < 128 || awk_mb_cur_max == 1) { *rune = c; return 1; } @@ -679,7 +679,7 @@ int u8_strlen(const char *s) totlen = 0; for (i = 0; i < n; i += len) { c = s[i]; - if (c < 128) { + if (c < 128 || awk_mb_cur_max == 1) { len = 1; } else { len = u8_nextlen(&s[i]); @@ -985,7 +985,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ if (a[2] != NULL) z = execute(a[2]); s = getsval(x); - k = strlen(s) + 1; + k = u8_strlen(s) + 1; if (k <= 1) { tempfree(x); tempfree(y); @@ -1289,7 +1289,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co int charval = (int) getfval(x); if (charval != 0) { - if (charval < 128) + if (charval < 128 || awk_mb_cur_max == 1) snprintf(p, BUFSZ(p), fmt, charval); else { // possible unicode character @@ -1349,7 +1349,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co int i; if (ljust) { // print one char from t, then pad blanks - for (int i = 0; i < n; i++) + for (i = 0; i < n; i++) *p++ = t[i]; for (i = 0; i < pad; i++) { //printf(" "); @@ -1360,7 +1360,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co //printf(" "); *p++ = ' '; } - for (int i = 0; i < n; i++) + for (i = 0; i < n; i++) *p++ = t[i]; } *p = 0; @@ -1977,7 +1977,7 @@ static char *nawk_convert(const char *s, int (*fun_c)(int), const char *ps = NULL; size_t n = 0; wchar_t wc; - size_t sz = MB_CUR_MAX; + const size_t sz = awk_mb_cur_max; int unused; if (sz == 1) { |