aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorozan yigit <ozan.yigit@gmail.com>2023-10-30 19:54:12 -0400
committerozan yigit <ozan.yigit@gmail.com>2023-10-30 19:54:12 -0400
commit0609870c7a2ffb86ad2fe286c20ddd6bc9908e28 (patch)
treeca78693de671295827f442353030f89a22a6b534
parentb9b8a321d5353f8b43b22bb0595fa2de137530aa (diff)
parent1087d463e9debc7a984b9b7123e72b3dc70c9819 (diff)
downloadone-true-awk-0609870c7a2ffb86ad2fe286c20ddd6bc9908e28.tar.gz
todd miller's change to disable utf-8 for non-multibyte locales
such as C or POSIX
-rw-r--r--awk.h2
-rw-r--r--b.c2
-rw-r--r--main.c3
-rw-r--r--run.c10
4 files changed, 11 insertions, 6 deletions
diff --git a/awk.h b/awk.h
index 49b5dfc..217319c 100644
--- a/awk.h
+++ b/awk.h
@@ -64,6 +64,8 @@ extern bool safe; /* false => unsafe, true => safe */
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
extern int recsize; /* size of current record, orig RECSIZE */
+extern size_t awk_mb_cur_max; /* max size of a multi-byte character */
+
extern char EMPTY[]; /* this avoid -Wwritable-strings issues */
extern char **FS;
extern char **RS;
diff --git a/b.c b/b.c
index 55b320e..9792a97 100644
--- a/b.c
+++ b/b.c
@@ -778,7 +778,7 @@ struct runedata getrune(FILE *fp)
c = getc(fp);
if (c == EOF)
return result; // result.rune == 0 --> EOF
- else if (c < 128) {
+ else if (c < 128 || awk_mb_cur_max == 1) {
result.bytes[0] = c;
result.len = 1;
result.rune = c;
diff --git a/main.c b/main.c
index d500729..8f03963 100644
--- a/main.c
+++ b/main.c
@@ -53,6 +53,8 @@ bool CSV = false; /* true for csv input */
bool safe = false; /* true => "safe" mode */
+size_t awk_mb_cur_max = 1;
+
static noreturn void fpecatch(int n
#ifdef SA_SIGINFO
, siginfo_t *si, void *uc
@@ -116,6 +118,7 @@ int main(int argc, char *argv[])
setlocale(LC_CTYPE, "");
setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
+ awk_mb_cur_max = MB_CUR_MAX;
cmdname = argv[0];
if (argc == 1) {
fprintf(stderr,
diff --git a/run.c b/run.c
index bb64e51..bbf149c 100644
--- a/run.c
+++ b/run.c
@@ -605,7 +605,7 @@ int u8_isutf(const char *s)
unsigned char c;
c = s[0];
- if (c < 128)
+ if (c < 128 || awk_mb_cur_max == 1)
return 1; /* what if it's 0? */
n = strlen(s);
@@ -632,7 +632,7 @@ int u8_rune(int *rune, const char *s)
unsigned char c;
c = s[0];
- if (c < 128) {
+ if (c < 128 || awk_mb_cur_max == 1) {
*rune = c;
return 1;
}
@@ -679,7 +679,7 @@ int u8_strlen(const char *s)
totlen = 0;
for (i = 0; i < n; i += len) {
c = s[i];
- if (c < 128) {
+ if (c < 128 || awk_mb_cur_max == 1) {
len = 1;
} else {
len = u8_nextlen(&s[i]);
@@ -1289,7 +1289,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
int charval = (int) getfval(x);
if (charval != 0) {
- if (charval < 128)
+ if (charval < 128 || awk_mb_cur_max == 1)
snprintf(p, BUFSZ(p), fmt, charval);
else {
// possible unicode character
@@ -1977,7 +1977,7 @@ static char *nawk_convert(const char *s, int (*fun_c)(int),
const char *ps = NULL;
size_t n = 0;
wchar_t wc;
- size_t sz = MB_CUR_MAX;
+ const size_t sz = awk_mb_cur_max;
int unused;
if (sz == 1) {