diff options
author | ozan yigit <ozan.yigit@gmail.com> | 2023-10-30 19:54:12 -0400 |
---|---|---|
committer | ozan yigit <ozan.yigit@gmail.com> | 2023-10-30 19:54:12 -0400 |
commit | 0609870c7a2ffb86ad2fe286c20ddd6bc9908e28 (patch) | |
tree | ca78693de671295827f442353030f89a22a6b534 | |
parent | b9b8a321d5353f8b43b22bb0595fa2de137530aa (diff) | |
parent | 1087d463e9debc7a984b9b7123e72b3dc70c9819 (diff) | |
download | one-true-awk-0609870c7a2ffb86ad2fe286c20ddd6bc9908e28.tar.gz |
todd miller's change to disable utf-8 for non-multibyte locales
such as C or POSIX
-rw-r--r-- | awk.h | 2 | ||||
-rw-r--r-- | b.c | 2 | ||||
-rw-r--r-- | main.c | 3 | ||||
-rw-r--r-- | run.c | 10 |
4 files changed, 11 insertions, 6 deletions
@@ -64,6 +64,8 @@ extern bool safe; /* false => unsafe, true => safe */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ extern int recsize; /* size of current record, orig RECSIZE */ +extern size_t awk_mb_cur_max; /* max size of a multi-byte character */ + extern char EMPTY[]; /* this avoid -Wwritable-strings issues */ extern char **FS; extern char **RS; @@ -778,7 +778,7 @@ struct runedata getrune(FILE *fp) c = getc(fp); if (c == EOF) return result; // result.rune == 0 --> EOF - else if (c < 128) { + else if (c < 128 || awk_mb_cur_max == 1) { result.bytes[0] = c; result.len = 1; result.rune = c; @@ -53,6 +53,8 @@ bool CSV = false; /* true for csv input */ bool safe = false; /* true => "safe" mode */ +size_t awk_mb_cur_max = 1; + static noreturn void fpecatch(int n #ifdef SA_SIGINFO , siginfo_t *si, void *uc @@ -116,6 +118,7 @@ int main(int argc, char *argv[]) setlocale(LC_CTYPE, ""); setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */ + awk_mb_cur_max = MB_CUR_MAX; cmdname = argv[0]; if (argc == 1) { fprintf(stderr, @@ -605,7 +605,7 @@ int u8_isutf(const char *s) unsigned char c; c = s[0]; - if (c < 128) + if (c < 128 || awk_mb_cur_max == 1) return 1; /* what if it's 0? */ n = strlen(s); @@ -632,7 +632,7 @@ int u8_rune(int *rune, const char *s) unsigned char c; c = s[0]; - if (c < 128) { + if (c < 128 || awk_mb_cur_max == 1) { *rune = c; return 1; } @@ -679,7 +679,7 @@ int u8_strlen(const char *s) totlen = 0; for (i = 0; i < n; i += len) { c = s[i]; - if (c < 128) { + if (c < 128 || awk_mb_cur_max == 1) { len = 1; } else { len = u8_nextlen(&s[i]); @@ -1289,7 +1289,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co int charval = (int) getfval(x); if (charval != 0) { - if (charval < 128) + if (charval < 128 || awk_mb_cur_max == 1) snprintf(p, BUFSZ(p), fmt, charval); else { // possible unicode character @@ -1977,7 +1977,7 @@ static char *nawk_convert(const char *s, int (*fun_c)(int), const char *ps = NULL; size_t n = 0; wchar_t wc; - size_t sz = MB_CUR_MAX; + const size_t sz = awk_mb_cur_max; int unused; if (sz == 1) { |