aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Galenson <jgalenson@google.com>2021-06-21 14:18:49 -0700
committerJoel Galenson <jgalenson@google.com>2021-06-21 14:18:49 -0700
commit674441f567f41603a4d237346faa19b3014afefc (patch)
tree5af5371df8aa1fd30c5811becaccbbbd86179dab
parentd43d2d3679fb3b658e1322c1d2f0a636e5dd6a4e (diff)
downloadunicode-normalization-android-s-beta-4.tar.gz
Test: make Change-Id: I60d1eb3d36a4fab926b676d9a8d2e5d9f85b926c
-rw-r--r--.cargo_vcs_info.json2
-rw-r--r--.github/workflows/rust.yml4
-rw-r--r--Cargo.toml2
-rw-r--r--Cargo.toml.orig2
-rw-r--r--METADATA8
-rw-r--r--README.md2
-rw-r--r--TEST_MAPPING11
-rw-r--r--scripts/unicode.py53
-rw-r--r--src/lib.rs7
-rw-r--r--[-rwxr-xr-x]src/no_std_prelude.rs0
-rw-r--r--src/tables.rs684
11 files changed, 762 insertions, 13 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 4c8700c..2ac32ce 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "4e05cc1cc6435d8e1d5a412ed45a544c0f214ee8"
+ "sha1": "cc28b8ca9f9ef38173793573c14c1a6106908ea2"
}
}
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index c83071c..2cbb9a6 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -34,6 +34,6 @@ jobs:
- name: Package
run: cargo package
- name: Test package
- run: cd target/package/unicode-normalization-* && cargo test
+ run: cd $(find target/package/ -maxdepth 1 -mindepth 1 -type d) && cargo test
- name: Test package without features
- run: cd target/package/unicode-normalization-* && cargo test --no-default-features
+ run: cd $(find target/package/ -maxdepth 1 -mindepth 1 -type d) && cargo test --no-default-features
diff --git a/Cargo.toml b/Cargo.toml
index 92de010..d941fb3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
[package]
edition = "2018"
name = "unicode-normalization"
-version = "0.1.17"
+version = "0.1.19"
authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
exclude = ["target/*", "Cargo.lock", "scripts/tmp", "*.txt", "tests/*"]
description = "This crate provides functions for normalization of\nUnicode strings, including Canonical and Compatible\nDecomposition and Recomposition, as described in\nUnicode Standard Annex #15.\n"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 01aa7cd..51d9444 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,7 +1,7 @@
[package]
name = "unicode-normalization"
-version = "0.1.17"
+version = "0.1.19"
authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
homepage = "https://github.com/unicode-rs/unicode-normalization"
diff --git a/METADATA b/METADATA
index 711f72f..bcb028d 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/unicode-normalization/unicode-normalization-0.1.17.crate"
+ value: "https://static.crates.io/crates/unicode-normalization/unicode-normalization-0.1.19.crate"
}
- version: "0.1.17"
+ version: "0.1.19"
license_type: NOTICE
last_upgrade_date {
year: 2021
- month: 2
- day: 9
+ month: 6
+ day: 21
}
}
diff --git a/README.md b/README.md
index c0e2a71..746a7e0 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ to your `Cargo.toml`:
```toml
[dependencies]
-unicode-normalization = "0.1.16"
+unicode-normalization = "0.1.19"
```
## `no_std` + `alloc` support
diff --git a/TEST_MAPPING b/TEST_MAPPING
index ef16d49..bf91080 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -2,13 +2,22 @@
{
"presubmit": [
{
+ "name": "doh_unit_test"
+ },
+ {
"name": "quiche_device_test_src_lib"
},
{
+ "name": "unicode-normalization_device_test_src_lib"
+ },
+ {
"name": "url_device_test_src_lib"
},
{
- "name": "unicode-normalization_device_test_src_lib"
+ "name": "url_device_test_tests_data"
+ },
+ {
+ "name": "url_device_test_tests_unit"
}
]
}
diff --git a/scripts/unicode.py b/scripts/unicode.py
index 74164fe..c4a1a2b 100644
--- a/scripts/unicode.py
+++ b/scripts/unicode.py
@@ -98,12 +98,17 @@ class UnicodeData(object):
self.compat_decomp = {}
self.canon_decomp = {}
self.general_category_mark = []
+ self.general_category_public_assigned = []
+
+ assigned_start = 0;
+ prev_char_int = -1;
+ prev_name = "";
for line in self._fetch("UnicodeData.txt").splitlines():
# See ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
pieces = line.split(';')
assert len(pieces) == 15
- char, category, cc, decomp = pieces[0], pieces[2], pieces[3], pieces[5]
+ char, name, category, cc, decomp = pieces[0], pieces[1], pieces[2], pieces[3], pieces[5]
char_int = int(char, 16)
name = pieces[1].strip()
@@ -120,6 +125,16 @@ class UnicodeData(object):
if category == 'M' or 'M' in expanded_categories.get(category, []):
self.general_category_mark.append(char_int)
+ assert category != 'Cn', "Unexpected: Unassigned codepoint in UnicodeData.txt"
+ if category not in ['Co', 'Cs']:
+ if char_int != prev_char_int + 1 and not is_first_and_last(prev_name, name):
+ self.general_category_public_assigned.append((assigned_start, prev_char_int))
+ assigned_start = char_int
+ prev_char_int = char_int
+ prev_name = name;
+
+ self.general_category_public_assigned.append((assigned_start, prev_char_int))
+
def _load_cjk_compat_ideograph_variants(self):
for line in self._fetch("StandardizedVariants.txt").splitlines():
strip_comments = line.split('#', 1)[0].strip()
@@ -330,6 +345,15 @@ class UnicodeData(object):
hexify = lambda c: '{:04X}'.format(c)
+# Test whether `first` and `last` are corresponding "<..., First>" and
+# "<..., Last>" markers.
+def is_first_and_last(first, last):
+ if not first.startswith('<') or not first.endswith(', First>'):
+ return False
+ if not last.startswith('<') or not last.endswith(', Last>'):
+ return False
+ return first[1:-8] == last[1:-7]
+
def gen_mph_data(name, d, kv_type, kv_callback):
(salt, keys) = minimal_perfect_hash(d)
out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper())
@@ -418,6 +442,30 @@ def gen_combining_mark(general_category_mark, out):
gen_mph_data('combining_mark', general_category_mark, 'u32',
lambda k: '0x{:04x}'.format(k))
+def gen_public_assigned(general_category_public_assigned, out):
+ # This could be done as a hash but the table is somewhat small.
+ out.write("#[inline]\n")
+ out.write("pub fn is_public_assigned(c: char) -> bool {\n")
+ out.write(" match c {\n")
+
+ start = True
+ for first, last in general_category_public_assigned:
+ if start:
+ out.write(" ")
+ start = False
+ else:
+ out.write(" | ")
+ if first == last:
+ out.write("'\\u{%s}'\n" % hexify(first))
+ else:
+ out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last)))
+ out.write(" => true,\n")
+
+ out.write(" _ => false,\n")
+ out.write(" }\n")
+ out.write("}\n")
+ out.write("\n")
+
def gen_stream_safe(leading, trailing, out):
# This could be done as a hash but the table is very small.
out.write("#[inline]\n")
@@ -540,6 +588,9 @@ if __name__ == '__main__':
gen_combining_mark(data.general_category_mark, out)
out.write("\n")
+ gen_public_assigned(data.general_category_public_assigned, out)
+ out.write("\n")
+
gen_nfc_qc(data.norm_props, out)
out.write("\n")
diff --git a/src/lib.rs b/src/lib.rs
index cb623ba..2c3a090 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -34,7 +34,7 @@
//!
//! ```toml
//! [dependencies]
-//! unicode-normalization = "0.1.8"
+//! unicode-normalization = "0.1.19"
//! ```
#![deny(missing_docs, unsafe_code)]
@@ -90,6 +90,11 @@ pub mod char {
};
pub use crate::lookups::{canonical_combining_class, is_combining_mark};
+
+ /// Return whether the given character is assigned (`General_Category` != `Unassigned`)
+ /// and not Private-Use (`General_Category` != `Private_Use`), in the supported version
+ /// of Unicode.
+ pub use crate::tables::is_public_assigned;
}
/// Methods for iterating over strings while applying Unicode normalizations
diff --git a/src/no_std_prelude.rs b/src/no_std_prelude.rs
index 838d122..838d122 100755..100644
--- a/src/no_std_prelude.rs
+++ b/src/no_std_prelude.rs
diff --git a/src/tables.rs b/src/tables.rs
index 81ebf23..0477b50 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -21767,6 +21767,690 @@ pub(crate) const COMBINING_MARK_KV: &[u32] = &[
#[inline]
+pub fn is_public_assigned(c: char) -> bool {
+ match c {
+ '\u{0000}'..='\u{0377}'
+ | '\u{037A}'..='\u{037F}'
+ | '\u{0384}'..='\u{038A}'
+ | '\u{038C}'
+ | '\u{038E}'..='\u{03A1}'
+ | '\u{03A3}'..='\u{052F}'
+ | '\u{0531}'..='\u{0556}'
+ | '\u{0559}'..='\u{058A}'
+ | '\u{058D}'..='\u{058F}'
+ | '\u{0591}'..='\u{05C7}'
+ | '\u{05D0}'..='\u{05EA}'
+ | '\u{05EF}'..='\u{05F4}'
+ | '\u{0600}'..='\u{061C}'
+ | '\u{061E}'..='\u{070D}'
+ | '\u{070F}'..='\u{074A}'
+ | '\u{074D}'..='\u{07B1}'
+ | '\u{07C0}'..='\u{07FA}'
+ | '\u{07FD}'..='\u{082D}'
+ | '\u{0830}'..='\u{083E}'
+ | '\u{0840}'..='\u{085B}'
+ | '\u{085E}'
+ | '\u{0860}'..='\u{086A}'
+ | '\u{08A0}'..='\u{08B4}'
+ | '\u{08B6}'..='\u{08C7}'
+ | '\u{08D3}'..='\u{0983}'
+ | '\u{0985}'..='\u{098C}'
+ | '\u{098F}'..='\u{0990}'
+ | '\u{0993}'..='\u{09A8}'
+ | '\u{09AA}'..='\u{09B0}'
+ | '\u{09B2}'
+ | '\u{09B6}'..='\u{09B9}'
+ | '\u{09BC}'..='\u{09C4}'
+ | '\u{09C7}'..='\u{09C8}'
+ | '\u{09CB}'..='\u{09CE}'
+ | '\u{09D7}'
+ | '\u{09DC}'..='\u{09DD}'
+ | '\u{09DF}'..='\u{09E3}'
+ | '\u{09E6}'..='\u{09FE}'
+ | '\u{0A01}'..='\u{0A03}'
+ | '\u{0A05}'..='\u{0A0A}'
+ | '\u{0A0F}'..='\u{0A10}'
+ | '\u{0A13}'..='\u{0A28}'
+ | '\u{0A2A}'..='\u{0A30}'
+ | '\u{0A32}'..='\u{0A33}'
+ | '\u{0A35}'..='\u{0A36}'
+ | '\u{0A38}'..='\u{0A39}'
+ | '\u{0A3C}'
+ | '\u{0A3E}'..='\u{0A42}'
+ | '\u{0A47}'..='\u{0A48}'
+ | '\u{0A4B}'..='\u{0A4D}'
+ | '\u{0A51}'
+ | '\u{0A59}'..='\u{0A5C}'
+ | '\u{0A5E}'
+ | '\u{0A66}'..='\u{0A76}'
+ | '\u{0A81}'..='\u{0A83}'
+ | '\u{0A85}'..='\u{0A8D}'
+ | '\u{0A8F}'..='\u{0A91}'
+ | '\u{0A93}'..='\u{0AA8}'
+ | '\u{0AAA}'..='\u{0AB0}'
+ | '\u{0AB2}'..='\u{0AB3}'
+ | '\u{0AB5}'..='\u{0AB9}'
+ | '\u{0ABC}'..='\u{0AC5}'
+ | '\u{0AC7}'..='\u{0AC9}'
+ | '\u{0ACB}'..='\u{0ACD}'
+ | '\u{0AD0}'
+ | '\u{0AE0}'..='\u{0AE3}'
+ | '\u{0AE6}'..='\u{0AF1}'
+ | '\u{0AF9}'..='\u{0AFF}'
+ | '\u{0B01}'..='\u{0B03}'
+ | '\u{0B05}'..='\u{0B0C}'
+ | '\u{0B0F}'..='\u{0B10}'
+ | '\u{0B13}'..='\u{0B28}'
+ | '\u{0B2A}'..='\u{0B30}'
+ | '\u{0B32}'..='\u{0B33}'
+ | '\u{0B35}'..='\u{0B39}'
+ | '\u{0B3C}'..='\u{0B44}'
+ | '\u{0B47}'..='\u{0B48}'
+ | '\u{0B4B}'..='\u{0B4D}'
+ | '\u{0B55}'..='\u{0B57}'
+ | '\u{0B5C}'..='\u{0B5D}'
+ | '\u{0B5F}'..='\u{0B63}'
+ | '\u{0B66}'..='\u{0B77}'
+ | '\u{0B82}'..='\u{0B83}'
+ | '\u{0B85}'..='\u{0B8A}'
+ | '\u{0B8E}'..='\u{0B90}'
+ | '\u{0B92}'..='\u{0B95}'
+ | '\u{0B99}'..='\u{0B9A}'
+ | '\u{0B9C}'
+ | '\u{0B9E}'..='\u{0B9F}'
+ | '\u{0BA3}'..='\u{0BA4}'
+ | '\u{0BA8}'..='\u{0BAA}'
+ | '\u{0BAE}'..='\u{0BB9}'
+ | '\u{0BBE}'..='\u{0BC2}'
+ | '\u{0BC6}'..='\u{0BC8}'
+ | '\u{0BCA}'..='\u{0BCD}'
+ | '\u{0BD0}'
+ | '\u{0BD7}'
+ | '\u{0BE6}'..='\u{0BFA}'
+ | '\u{0C00}'..='\u{0C0C}'
+ | '\u{0C0E}'..='\u{0C10}'
+ | '\u{0C12}'..='\u{0C28}'
+ | '\u{0C2A}'..='\u{0C39}'
+ | '\u{0C3D}'..='\u{0C44}'
+ | '\u{0C46}'..='\u{0C48}'
+ | '\u{0C4A}'..='\u{0C4D}'
+ | '\u{0C55}'..='\u{0C56}'
+ | '\u{0C58}'..='\u{0C5A}'
+ | '\u{0C60}'..='\u{0C63}'
+ | '\u{0C66}'..='\u{0C6F}'
+ | '\u{0C77}'..='\u{0C8C}'
+ | '\u{0C8E}'..='\u{0C90}'
+ | '\u{0C92}'..='\u{0CA8}'
+ | '\u{0CAA}'..='\u{0CB3}'
+ | '\u{0CB5}'..='\u{0CB9}'
+ | '\u{0CBC}'..='\u{0CC4}'
+ | '\u{0CC6}'..='\u{0CC8}'
+ | '\u{0CCA}'..='\u{0CCD}'
+ | '\u{0CD5}'..='\u{0CD6}'
+ | '\u{0CDE}'
+ | '\u{0CE0}'..='\u{0CE3}'
+ | '\u{0CE6}'..='\u{0CEF}'
+ | '\u{0CF1}'..='\u{0CF2}'
+ | '\u{0D00}'..='\u{0D0C}'
+ | '\u{0D0E}'..='\u{0D10}'
+ | '\u{0D12}'..='\u{0D44}'
+ | '\u{0D46}'..='\u{0D48}'
+ | '\u{0D4A}'..='\u{0D4F}'
+ | '\u{0D54}'..='\u{0D63}'
+ | '\u{0D66}'..='\u{0D7F}'
+ | '\u{0D81}'..='\u{0D83}'
+ | '\u{0D85}'..='\u{0D96}'
+ | '\u{0D9A}'..='\u{0DB1}'
+ | '\u{0DB3}'..='\u{0DBB}'
+ | '\u{0DBD}'
+ | '\u{0DC0}'..='\u{0DC6}'
+ | '\u{0DCA}'
+ | '\u{0DCF}'..='\u{0DD4}'
+ | '\u{0DD6}'
+ | '\u{0DD8}'..='\u{0DDF}'
+ | '\u{0DE6}'..='\u{0DEF}'
+ | '\u{0DF2}'..='\u{0DF4}'
+ | '\u{0E01}'..='\u{0E3A}'
+ | '\u{0E3F}'..='\u{0E5B}'
+ | '\u{0E81}'..='\u{0E82}'
+ | '\u{0E84}'
+ | '\u{0E86}'..='\u{0E8A}'
+ | '\u{0E8C}'..='\u{0EA3}'
+ | '\u{0EA5}'
+ | '\u{0EA7}'..='\u{0EBD}'
+ | '\u{0EC0}'..='\u{0EC4}'
+ | '\u{0EC6}'
+ | '\u{0EC8}'..='\u{0ECD}'
+ | '\u{0ED0}'..='\u{0ED9}'
+ | '\u{0EDC}'..='\u{0EDF}'
+ | '\u{0F00}'..='\u{0F47}'
+ | '\u{0F49}'..='\u{0F6C}'
+ | '\u{0F71}'..='\u{0F97}'
+ | '\u{0F99}'..='\u{0FBC}'
+ | '\u{0FBE}'..='\u{0FCC}'
+ | '\u{0FCE}'..='\u{0FDA}'
+ | '\u{1000}'..='\u{10C5}'
+ | '\u{10C7}'
+ | '\u{10CD}'
+ | '\u{10D0}'..='\u{1248}'
+ | '\u{124A}'..='\u{124D}'
+ | '\u{1250}'..='\u{1256}'
+ | '\u{1258}'
+ | '\u{125A}'..='\u{125D}'
+ | '\u{1260}'..='\u{1288}'
+ | '\u{128A}'..='\u{128D}'
+ | '\u{1290}'..='\u{12B0}'
+ | '\u{12B2}'..='\u{12B5}'
+ | '\u{12B8}'..='\u{12BE}'
+ | '\u{12C0}'
+ | '\u{12C2}'..='\u{12C5}'
+ | '\u{12C8}'..='\u{12D6}'
+ | '\u{12D8}'..='\u{1310}'
+ | '\u{1312}'..='\u{1315}'
+ | '\u{1318}'..='\u{135A}'
+ | '\u{135D}'..='\u{137C}'
+ | '\u{1380}'..='\u{1399}'
+ | '\u{13A0}'..='\u{13F5}'
+ | '\u{13F8}'..='\u{13FD}'
+ | '\u{1400}'..='\u{169C}'
+ | '\u{16A0}'..='\u{16F8}'
+ | '\u{1700}'..='\u{170C}'
+ | '\u{170E}'..='\u{1714}'
+ | '\u{1720}'..='\u{1736}'
+ | '\u{1740}'..='\u{1753}'
+ | '\u{1760}'..='\u{176C}'
+ | '\u{176E}'..='\u{1770}'
+ | '\u{1772}'..='\u{1773}'
+ | '\u{1780}'..='\u{17DD}'
+ | '\u{17E0}'..='\u{17E9}'
+ | '\u{17F0}'..='\u{17F9}'
+ | '\u{1800}'..='\u{180E}'
+ | '\u{1810}'..='\u{1819}'
+ | '\u{1820}'..='\u{1878}'
+ | '\u{1880}'..='\u{18AA}'
+ | '\u{18B0}'..='\u{18F5}'
+ | '\u{1900}'..='\u{191E}'
+ | '\u{1920}'..='\u{192B}'
+ | '\u{1930}'..='\u{193B}'
+ | '\u{1940}'
+ | '\u{1944}'..='\u{196D}'
+ | '\u{1970}'..='\u{1974}'
+ | '\u{1980}'..='\u{19AB}'
+ | '\u{19B0}'..='\u{19C9}'
+ | '\u{19D0}'..='\u{19DA}'
+ | '\u{19DE}'..='\u{1A1B}'
+ | '\u{1A1E}'..='\u{1A5E}'
+ | '\u{1A60}'..='\u{1A7C}'
+ | '\u{1A7F}'..='\u{1A89}'
+ | '\u{1A90}'..='\u{1A99}'
+ | '\u{1AA0}'..='\u{1AAD}'
+ | '\u{1AB0}'..='\u{1AC0}'
+ | '\u{1B00}'..='\u{1B4B}'
+ | '\u{1B50}'..='\u{1B7C}'
+ | '\u{1B80}'..='\u{1BF3}'
+ | '\u{1BFC}'..='\u{1C37}'
+ | '\u{1C3B}'..='\u{1C49}'
+ | '\u{1C4D}'..='\u{1C88}'
+ | '\u{1C90}'..='\u{1CBA}'
+ | '\u{1CBD}'..='\u{1CC7}'
+ | '\u{1CD0}'..='\u{1CFA}'
+ | '\u{1D00}'..='\u{1DF9}'
+ | '\u{1DFB}'..='\u{1F15}'
+ | '\u{1F18}'..='\u{1F1D}'
+ | '\u{1F20}'..='\u{1F45}'
+ | '\u{1F48}'..='\u{1F4D}'
+ | '\u{1F50}'..='\u{1F57}'
+ | '\u{1F59}'
+ | '\u{1F5B}'
+ | '\u{1F5D}'
+ | '\u{1F5F}'..='\u{1F7D}'
+ | '\u{1F80}'..='\u{1FB4}'
+ | '\u{1FB6}'..='\u{1FC4}'
+ | '\u{1FC6}'..='\u{1FD3}'
+ | '\u{1FD6}'..='\u{1FDB}'
+ | '\u{1FDD}'..='\u{1FEF}'
+ | '\u{1FF2}'..='\u{1FF4}'
+ | '\u{1FF6}'..='\u{1FFE}'
+ | '\u{2000}'..='\u{2064}'
+ | '\u{2066}'..='\u{2071}'
+ | '\u{2074}'..='\u{208E}'
+ | '\u{2090}'..='\u{209C}'
+ | '\u{20A0}'..='\u{20BF}'
+ | '\u{20D0}'..='\u{20F0}'
+ | '\u{2100}'..='\u{218B}'
+ | '\u{2190}'..='\u{2426}'
+ | '\u{2440}'..='\u{244A}'
+ | '\u{2460}'..='\u{2B73}'
+ | '\u{2B76}'..='\u{2B95}'
+ | '\u{2B97}'..='\u{2C2E}'
+ | '\u{2C30}'..='\u{2C5E}'
+ | '\u{2C60}'..='\u{2CF3}'
+ | '\u{2CF9}'..='\u{2D25}'
+ | '\u{2D27}'
+ | '\u{2D2D}'
+ | '\u{2D30}'..='\u{2D67}'
+ | '\u{2D6F}'..='\u{2D70}'
+ | '\u{2D7F}'..='\u{2D96}'
+ | '\u{2DA0}'..='\u{2DA6}'
+ | '\u{2DA8}'..='\u{2DAE}'
+ | '\u{2DB0}'..='\u{2DB6}'
+ | '\u{2DB8}'..='\u{2DBE}'
+ | '\u{2DC0}'..='\u{2DC6}'
+ | '\u{2DC8}'..='\u{2DCE}'
+ | '\u{2DD0}'..='\u{2DD6}'
+ | '\u{2DD8}'..='\u{2DDE}'
+ | '\u{2DE0}'..='\u{2E52}'
+ | '\u{2E80}'..='\u{2E99}'
+ | '\u{2E9B}'..='\u{2EF3}'
+ | '\u{2F00}'..='\u{2FD5}'
+ | '\u{2FF0}'..='\u{2FFB}'
+ | '\u{3000}'..='\u{303F}'
+ | '\u{3041}'..='\u{3096}'
+ | '\u{3099}'..='\u{30FF}'
+ | '\u{3105}'..='\u{312F}'
+ | '\u{3131}'..='\u{318E}'
+ | '\u{3190}'..='\u{31E3}'
+ | '\u{31F0}'..='\u{321E}'
+ | '\u{3220}'..='\u{9FFC}'
+ | '\u{A000}'..='\u{A48C}'
+ | '\u{A490}'..='\u{A4C6}'
+ | '\u{A4D0}'..='\u{A62B}'
+ | '\u{A640}'..='\u{A6F7}'
+ | '\u{A700}'..='\u{A7BF}'
+ | '\u{A7C2}'..='\u{A7CA}'
+ | '\u{A7F5}'..='\u{A82C}'
+ | '\u{A830}'..='\u{A839}'
+ | '\u{A840}'..='\u{A877}'
+ | '\u{A880}'..='\u{A8C5}'
+ | '\u{A8CE}'..='\u{A8D9}'
+ | '\u{A8E0}'..='\u{A953}'
+ | '\u{A95F}'..='\u{A97C}'
+ | '\u{A980}'..='\u{A9CD}'
+ | '\u{A9CF}'..='\u{A9D9}'
+ | '\u{A9DE}'..='\u{A9FE}'
+ | '\u{AA00}'..='\u{AA36}'
+ | '\u{AA40}'..='\u{AA4D}'
+ | '\u{AA50}'..='\u{AA59}'
+ | '\u{AA5C}'..='\u{AAC2}'
+ | '\u{AADB}'..='\u{AAF6}'
+ | '\u{AB01}'..='\u{AB06}'
+ | '\u{AB09}'..='\u{AB0E}'
+ | '\u{AB11}'..='\u{AB16}'
+ | '\u{AB20}'..='\u{AB26}'
+ | '\u{AB28}'..='\u{AB2E}'
+ | '\u{AB30}'..='\u{AB6B}'
+ | '\u{AB70}'..='\u{ABED}'
+ | '\u{ABF0}'..='\u{ABF9}'
+ | '\u{AC00}'..='\u{D7A3}'
+ | '\u{D7B0}'..='\u{D7C6}'
+ | '\u{D7CB}'..='\u{D7FB}'
+ | '\u{F900}'..='\u{FA6D}'
+ | '\u{FA70}'..='\u{FAD9}'
+ | '\u{FB00}'..='\u{FB06}'
+ | '\u{FB13}'..='\u{FB17}'
+ | '\u{FB1D}'..='\u{FB36}'
+ | '\u{FB38}'..='\u{FB3C}'
+ | '\u{FB3E}'
+ | '\u{FB40}'..='\u{FB41}'
+ | '\u{FB43}'..='\u{FB44}'
+ | '\u{FB46}'..='\u{FBC1}'
+ | '\u{FBD3}'..='\u{FD3F}'
+ | '\u{FD50}'..='\u{FD8F}'
+ | '\u{FD92}'..='\u{FDC7}'
+ | '\u{FDF0}'..='\u{FDFD}'
+ | '\u{FE00}'..='\u{FE19}'
+ | '\u{FE20}'..='\u{FE52}'
+ | '\u{FE54}'..='\u{FE66}'
+ | '\u{FE68}'..='\u{FE6B}'
+ | '\u{FE70}'..='\u{FE74}'
+ | '\u{FE76}'..='\u{FEFC}'
+ | '\u{FEFF}'
+ | '\u{FF01}'..='\u{FFBE}'
+ | '\u{FFC2}'..='\u{FFC7}'
+ | '\u{FFCA}'..='\u{FFCF}'
+ | '\u{FFD2}'..='\u{FFD7}'
+ | '\u{FFDA}'..='\u{FFDC}'
+ | '\u{FFE0}'..='\u{FFE6}'
+ | '\u{FFE8}'..='\u{FFEE}'
+ | '\u{FFF9}'..='\u{FFFD}'
+ | '\u{10000}'..='\u{1000B}'
+ | '\u{1000D}'..='\u{10026}'
+ | '\u{10028}'..='\u{1003A}'
+ | '\u{1003C}'..='\u{1003D}'
+ | '\u{1003F}'..='\u{1004D}'
+ | '\u{10050}'..='\u{1005D}'
+ | '\u{10080}'..='\u{100FA}'
+ | '\u{10100}'..='\u{10102}'
+ | '\u{10107}'..='\u{10133}'
+ | '\u{10137}'..='\u{1018E}'
+ | '\u{10190}'..='\u{1019C}'
+ | '\u{101A0}'
+ | '\u{101D0}'..='\u{101FD}'
+ | '\u{10280}'..='\u{1029C}'
+ | '\u{102A0}'..='\u{102D0}'
+ | '\u{102E0}'..='\u{102FB}'
+ | '\u{10300}'..='\u{10323}'
+ | '\u{1032D}'..='\u{1034A}'
+ | '\u{10350}'..='\u{1037A}'
+ | '\u{10380}'..='\u{1039D}'
+ | '\u{1039F}'..='\u{103C3}'
+ | '\u{103C8}'..='\u{103D5}'
+ | '\u{10400}'..='\u{1049D}'
+ | '\u{104A0}'..='\u{104A9}'
+ | '\u{104B0}'..='\u{104D3}'
+ | '\u{104D8}'..='\u{104FB}'
+ | '\u{10500}'..='\u{10527}'
+ | '\u{10530}'..='\u{10563}'
+ | '\u{1056F}'
+ | '\u{10600}'..='\u{10736}'
+ | '\u{10740}'..='\u{10755}'
+ | '\u{10760}'..='\u{10767}'
+ | '\u{10800}'..='\u{10805}'
+ | '\u{10808}'
+ | '\u{1080A}'..='\u{10835}'
+ | '\u{10837}'..='\u{10838}'
+ | '\u{1083C}'
+ | '\u{1083F}'..='\u{10855}'
+ | '\u{10857}'..='\u{1089E}'
+ | '\u{108A7}'..='\u{108AF}'
+ | '\u{108E0}'..='\u{108F2}'
+ | '\u{108F4}'..='\u{108F5}'
+ | '\u{108FB}'..='\u{1091B}'
+ | '\u{1091F}'..='\u{10939}'
+ | '\u{1093F}'
+ | '\u{10980}'..='\u{109B7}'
+ | '\u{109BC}'..='\u{109CF}'
+ | '\u{109D2}'..='\u{10A03}'
+ | '\u{10A05}'..='\u{10A06}'
+ | '\u{10A0C}'..='\u{10A13}'
+ | '\u{10A15}'..='\u{10A17}'
+ | '\u{10A19}'..='\u{10A35}'
+ | '\u{10A38}'..='\u{10A3A}'
+ | '\u{10A3F}'..='\u{10A48}'
+ | '\u{10A50}'..='\u{10A58}'
+ | '\u{10A60}'..='\u{10A9F}'
+ | '\u{10AC0}'..='\u{10AE6}'
+ | '\u{10AEB}'..='\u{10AF6}'
+ | '\u{10B00}'..='\u{10B35}'
+ | '\u{10B39}'..='\u{10B55}'
+ | '\u{10B58}'..='\u{10B72}'
+ | '\u{10B78}'..='\u{10B91}'
+ | '\u{10B99}'..='\u{10B9C}'
+ | '\u{10BA9}'..='\u{10BAF}'
+ | '\u{10C00}'..='\u{10C48}'
+ | '\u{10C80}'..='\u{10CB2}'
+ | '\u{10CC0}'..='\u{10CF2}'
+ | '\u{10CFA}'..='\u{10D27}'
+ | '\u{10D30}'..='\u{10D39}'
+ | '\u{10E60}'..='\u{10E7E}'
+ | '\u{10E80}'..='\u{10EA9}'
+ | '\u{10EAB}'..='\u{10EAD}'
+ | '\u{10EB0}'..='\u{10EB1}'
+ | '\u{10F00}'..='\u{10F27}'
+ | '\u{10F30}'..='\u{10F59}'
+ | '\u{10FB0}'..='\u{10FCB}'
+ | '\u{10FE0}'..='\u{10FF6}'
+ | '\u{11000}'..='\u{1104D}'
+ | '\u{11052}'..='\u{1106F}'
+ | '\u{1107F}'..='\u{110C1}'
+ | '\u{110CD}'
+ | '\u{110D0}'..='\u{110E8}'
+ | '\u{110F0}'..='\u{110F9}'
+ | '\u{11100}'..='\u{11134}'
+ | '\u{11136}'..='\u{11147}'
+ | '\u{11150}'..='\u{11176}'
+ | '\u{11180}'..='\u{111DF}'
+ | '\u{111E1}'..='\u{111F4}'
+ | '\u{11200}'..='\u{11211}'
+ | '\u{11213}'..='\u{1123E}'
+ | '\u{11280}'..='\u{11286}'
+ | '\u{11288}'
+ | '\u{1128A}'..='\u{1128D}'
+ | '\u{1128F}'..='\u{1129D}'
+ | '\u{1129F}'..='\u{112A9}'
+ | '\u{112B0}'..='\u{112EA}'
+ | '\u{112F0}'..='\u{112F9}'
+ | '\u{11300}'..='\u{11303}'
+ | '\u{11305}'..='\u{1130C}'
+ | '\u{1130F}'..='\u{11310}'
+ | '\u{11313}'..='\u{11328}'
+ | '\u{1132A}'..='\u{11330}'
+ | '\u{11332}'..='\u{11333}'
+ | '\u{11335}'..='\u{11339}'
+ | '\u{1133B}'..='\u{11344}'
+ | '\u{11347}'..='\u{11348}'
+ | '\u{1134B}'..='\u{1134D}'
+ | '\u{11350}'
+ | '\u{11357}'
+ | '\u{1135D}'..='\u{11363}'
+ | '\u{11366}'..='\u{1136C}'
+ | '\u{11370}'..='\u{11374}'
+ | '\u{11400}'..='\u{1145B}'
+ | '\u{1145D}'..='\u{11461}'
+ | '\u{11480}'..='\u{114C7}'
+ | '\u{114D0}'..='\u{114D9}'
+ | '\u{11580}'..='\u{115B5}'
+ | '\u{115B8}'..='\u{115DD}'
+ | '\u{11600}'..='\u{11644}'
+ | '\u{11650}'..='\u{11659}'
+ | '\u{11660}'..='\u{1166C}'
+ | '\u{11680}'..='\u{116B8}'
+ | '\u{116C0}'..='\u{116C9}'
+ | '\u{11700}'..='\u{1171A}'
+ | '\u{1171D}'..='\u{1172B}'
+ | '\u{11730}'..='\u{1173F}'
+ | '\u{11800}'..='\u{1183B}'
+ | '\u{118A0}'..='\u{118F2}'
+ | '\u{118FF}'..='\u{11906}'
+ | '\u{11909}'
+ | '\u{1190C}'..='\u{11913}'
+ | '\u{11915}'..='\u{11916}'
+ | '\u{11918}'..='\u{11935}'
+ | '\u{11937}'..='\u{11938}'
+ | '\u{1193B}'..='\u{11946}'
+ | '\u{11950}'..='\u{11959}'
+ | '\u{119A0}'..='\u{119A7}'
+ | '\u{119AA}'..='\u{119D7}'
+ | '\u{119DA}'..='\u{119E4}'
+ | '\u{11A00}'..='\u{11A47}'
+ | '\u{11A50}'..='\u{11AA2}'
+ | '\u{11AC0}'..='\u{11AF8}'
+ | '\u{11C00}'..='\u{11C08}'
+ | '\u{11C0A}'..='\u{11C36}'
+ | '\u{11C38}'..='\u{11C45}'
+ | '\u{11C50}'..='\u{11C6C}'
+ | '\u{11C70}'..='\u{11C8F}'
+ | '\u{11C92}'..='\u{11CA7}'
+ | '\u{11CA9}'..='\u{11CB6}'
+ | '\u{11D00}'..='\u{11D06}'
+ | '\u{11D08}'..='\u{11D09}'
+ | '\u{11D0B}'..='\u{11D36}'
+ | '\u{11D3A}'
+ | '\u{11D3C}'..='\u{11D3D}'
+ | '\u{11D3F}'..='\u{11D47}'
+ | '\u{11D50}'..='\u{11D59}'
+ | '\u{11D60}'..='\u{11D65}'
+ | '\u{11D67}'..='\u{11D68}'
+ | '\u{11D6A}'..='\u{11D8E}'
+ | '\u{11D90}'..='\u{11D91}'
+ | '\u{11D93}'..='\u{11D98}'
+ | '\u{11DA0}'..='\u{11DA9}'
+ | '\u{11EE0}'..='\u{11EF8}'
+ | '\u{11FB0}'
+ | '\u{11FC0}'..='\u{11FF1}'
+ | '\u{11FFF}'..='\u{12399}'
+ | '\u{12400}'..='\u{1246E}'
+ | '\u{12470}'..='\u{12474}'
+ | '\u{12480}'..='\u{12543}'
+ | '\u{13000}'..='\u{1342E}'
+ | '\u{13430}'..='\u{13438}'
+ | '\u{14400}'..='\u{14646}'
+ | '\u{16800}'..='\u{16A38}'
+ | '\u{16A40}'..='\u{16A5E}'
+ | '\u{16A60}'..='\u{16A69}'
+ | '\u{16A6E}'..='\u{16A6F}'
+ | '\u{16AD0}'..='\u{16AED}'
+ | '\u{16AF0}'..='\u{16AF5}'
+ | '\u{16B00}'..='\u{16B45}'
+ | '\u{16B50}'..='\u{16B59}'
+ | '\u{16B5B}'..='\u{16B61}'
+ | '\u{16B63}'..='\u{16B77}'
+ | '\u{16B7D}'..='\u{16B8F}'
+ | '\u{16E40}'..='\u{16E9A}'
+ | '\u{16F00}'..='\u{16F4A}'
+ | '\u{16F4F}'..='\u{16F87}'
+ | '\u{16F8F}'..='\u{16F9F}'
+ | '\u{16FE0}'..='\u{16FE4}'
+ | '\u{16FF0}'..='\u{16FF1}'
+ | '\u{17000}'..='\u{187F7}'
+ | '\u{18800}'..='\u{18CD5}'
+ | '\u{18D00}'..='\u{18D08}'
+ | '\u{1B000}'..='\u{1B11E}'
+ | '\u{1B150}'..='\u{1B152}'
+ | '\u{1B164}'..='\u{1B167}'
+ | '\u{1B170}'..='\u{1B2FB}'
+ | '\u{1BC00}'..='\u{1BC6A}'
+ | '\u{1BC70}'..='\u{1BC7C}'
+ | '\u{1BC80}'..='\u{1BC88}'
+ | '\u{1BC90}'..='\u{1BC99}'
+ | '\u{1BC9C}'..='\u{1BCA3}'
+ | '\u{1D000}'..='\u{1D0F5}'
+ | '\u{1D100}'..='\u{1D126}'
+ | '\u{1D129}'..='\u{1D1E8}'
+ | '\u{1D200}'..='\u{1D245}'
+ | '\u{1D2E0}'..='\u{1D2F3}'
+ | '\u{1D300}'..='\u{1D356}'
+ | '\u{1D360}'..='\u{1D378}'
+ | '\u{1D400}'..='\u{1D454}'
+ | '\u{1D456}'..='\u{1D49C}'
+ | '\u{1D49E}'..='\u{1D49F}'
+ | '\u{1D4A2}'
+ | '\u{1D4A5}'..='\u{1D4A6}'
+ | '\u{1D4A9}'..='\u{1D4AC}'
+ | '\u{1D4AE}'..='\u{1D4B9}'
+ | '\u{1D4BB}'
+ | '\u{1D4BD}'..='\u{1D4C3}'
+ | '\u{1D4C5}'..='\u{1D505}'
+ | '\u{1D507}'..='\u{1D50A}'
+ | '\u{1D50D}'..='\u{1D514}'
+ | '\u{1D516}'..='\u{1D51C}'
+ | '\u{1D51E}'..='\u{1D539}'
+ | '\u{1D53B}'..='\u{1D53E}'
+ | '\u{1D540}'..='\u{1D544}'
+ | '\u{1D546}'
+ | '\u{1D54A}'..='\u{1D550}'
+ | '\u{1D552}'..='\u{1D6A5}'
+ | '\u{1D6A8}'..='\u{1D7CB}'
+ | '\u{1D7CE}'..='\u{1DA8B}'
+ | '\u{1DA9B}'..='\u{1DA9F}'
+ | '\u{1DAA1}'..='\u{1DAAF}'
+ | '\u{1E000}'..='\u{1E006}'
+ | '\u{1E008}'..='\u{1E018}'
+ | '\u{1E01B}'..='\u{1E021}'
+ | '\u{1E023}'..='\u{1E024}'
+ | '\u{1E026}'..='\u{1E02A}'
+ | '\u{1E100}'..='\u{1E12C}'
+ | '\u{1E130}'..='\u{1E13D}'
+ | '\u{1E140}'..='\u{1E149}'
+ | '\u{1E14E}'..='\u{1E14F}'
+ | '\u{1E2C0}'..='\u{1E2F9}'
+ | '\u{1E2FF}'
+ | '\u{1E800}'..='\u{1E8C4}'
+ | '\u{1E8C7}'..='\u{1E8D6}'
+ | '\u{1E900}'..='\u{1E94B}'
+ | '\u{1E950}'..='\u{1E959}'
+ | '\u{1E95E}'..='\u{1E95F}'
+ | '\u{1EC71}'..='\u{1ECB4}'
+ | '\u{1ED01}'..='\u{1ED3D}'
+ | '\u{1EE00}'..='\u{1EE03}'
+ | '\u{1EE05}'..='\u{1EE1F}'
+ | '\u{1EE21}'..='\u{1EE22}'
+ | '\u{1EE24}'
+ | '\u{1EE27}'
+ | '\u{1EE29}'..='\u{1EE32}'
+ | '\u{1EE34}'..='\u{1EE37}'
+ | '\u{1EE39}'
+ | '\u{1EE3B}'
+ | '\u{1EE42}'
+ | '\u{1EE47}'
+ | '\u{1EE49}'
+ | '\u{1EE4B}'
+ | '\u{1EE4D}'..='\u{1EE4F}'
+ | '\u{1EE51}'..='\u{1EE52}'
+ | '\u{1EE54}'
+ | '\u{1EE57}'
+ | '\u{1EE59}'
+ | '\u{1EE5B}'
+ | '\u{1EE5D}'
+ | '\u{1EE5F}'
+ | '\u{1EE61}'..='\u{1EE62}'
+ | '\u{1EE64}'
+ | '\u{1EE67}'..='\u{1EE6A}'
+ | '\u{1EE6C}'..='\u{1EE72}'
+ | '\u{1EE74}'..='\u{1EE77}'
+ | '\u{1EE79}'..='\u{1EE7C}'
+ | '\u{1EE7E}'
+ | '\u{1EE80}'..='\u{1EE89}'
+ | '\u{1EE8B}'..='\u{1EE9B}'
+ | '\u{1EEA1}'..='\u{1EEA3}'
+ | '\u{1EEA5}'..='\u{1EEA9}'
+ | '\u{1EEAB}'..='\u{1EEBB}'
+ | '\u{1EEF0}'..='\u{1EEF1}'
+ | '\u{1F000}'..='\u{1F02B}'
+ | '\u{1F030}'..='\u{1F093}'
+ | '\u{1F0A0}'..='\u{1F0AE}'
+ | '\u{1F0B1}'..='\u{1F0BF}'
+ | '\u{1F0C1}'..='\u{1F0CF}'
+ | '\u{1F0D1}'..='\u{1F0F5}'
+ | '\u{1F100}'..='\u{1F1AD}'
+ | '\u{1F1E6}'..='\u{1F202}'
+ | '\u{1F210}'..='\u{1F23B}'
+ | '\u{1F240}'..='\u{1F248}'
+ | '\u{1F250}'..='\u{1F251}'
+ | '\u{1F260}'..='\u{1F265}'
+ | '\u{1F300}'..='\u{1F6D7}'
+ | '\u{1F6E0}'..='\u{1F6EC}'
+ | '\u{1F6F0}'..='\u{1F6FC}'
+ | '\u{1F700}'..='\u{1F773}'
+ | '\u{1F780}'..='\u{1F7D8}'
+ | '\u{1F7E0}'..='\u{1F7EB}'
+ | '\u{1F800}'..='\u{1F80B}'
+ | '\u{1F810}'..='\u{1F847}'
+ | '\u{1F850}'..='\u{1F859}'
+ | '\u{1F860}'..='\u{1F887}'
+ | '\u{1F890}'..='\u{1F8AD}'
+ | '\u{1F8B0}'..='\u{1F8B1}'
+ | '\u{1F900}'..='\u{1F978}'
+ | '\u{1F97A}'..='\u{1F9CB}'
+ | '\u{1F9CD}'..='\u{1FA53}'
+ | '\u{1FA60}'..='\u{1FA6D}'
+ | '\u{1FA70}'..='\u{1FA74}'
+ | '\u{1FA78}'..='\u{1FA7A}'
+ | '\u{1FA80}'..='\u{1FA86}'
+ | '\u{1FA90}'..='\u{1FAA8}'
+ | '\u{1FAB0}'..='\u{1FAB6}'
+ | '\u{1FAC0}'..='\u{1FAC2}'
+ | '\u{1FAD0}'..='\u{1FAD6}'
+ | '\u{1FB00}'..='\u{1FB92}'
+ | '\u{1FB94}'..='\u{1FBCA}'
+ | '\u{1FBF0}'..='\u{1FBF9}'
+ | '\u{20000}'..='\u{2A6DD}'
+ | '\u{2A700}'..='\u{2B734}'
+ | '\u{2B740}'..='\u{2B81D}'
+ | '\u{2B820}'..='\u{2CEA1}'
+ | '\u{2CEB0}'..='\u{2EBE0}'
+ | '\u{2F800}'..='\u{2FA1D}'
+ | '\u{30000}'..='\u{3134A}'
+ | '\u{E0001}'
+ | '\u{E0020}'..='\u{E007F}'
+ | '\u{E0100}'..='\u{E01EF}'
+ => true,
+ _ => false,
+ }
+}
+
+
+#[inline]
#[allow(ellipsis_inclusive_range_patterns)]
pub fn qc_nfc(c: char) -> IsNormalized {
match c {