diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 05:08:36 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 05:08:36 +0000 |
commit | 17c2fb15e206a9b8c416fe0eebe982c7f3b086dc (patch) | |
tree | 6512be3ea7d7276bbfd5e8d0aaae89a0031ad34f | |
parent | f08197036b7dcddcbe4a32a60e5fa505b57b400d (diff) | |
parent | a7203a368f3ae6eeb1ea563c2d47fed8f815dca8 (diff) | |
download | regex-syntax-android14-mainline-permission-release.tar.gz |
Snap for 10453563 from a7203a368f3ae6eeb1ea563c2d47fed8f815dca8 to mainline-permission-releaseaml_per_341614000aml_per_341510010aml_per_341410020aml_per_341311000aml_per_341110020aml_per_341110010aml_per_341011100aml_per_341011020aml_per_340916010android14-mainline-permission-release
Change-Id: I7a621a773502e87f6bc837618d94e112766b398f
36 files changed, 2733 insertions, 1480 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 3d660d5..734f7be 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,6 @@ { "git": { - "sha1": "3ea9e3eca7b762c30fbc09205522e3935cd70052" - } -} + "sha1": "72d482f911c4057f9a31f7f434dfe27c929a8913" + }, + "path_in_vcs": "regex-syntax" +}
\ No newline at end of file @@ -43,7 +43,7 @@ rust_library { host_supported: true, crate_name: "regex_syntax", cargo_env_compat: true, - cargo_pkg_version: "0.6.25", + cargo_pkg_version: "0.6.29", srcs: ["src/lib.rs"], edition: "2018", features: [ @@ -62,6 +62,8 @@ rust_library { "com.android.compos", "com.android.virt", ], + product_available: true, + vendor_available: true, } rust_test { @@ -69,7 +71,7 @@ rust_test { host_supported: true, crate_name: "regex_syntax", cargo_env_compat: true, - cargo_pkg_version: "0.6.25", + cargo_pkg_version: "0.6.29", srcs: ["src/lib.rs"], test_suites: ["general-tests"], auto_gen_config: true, @@ -3,27 +3,35 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "regex-syntax" -version = "0.6.25" +version = "0.6.29" authors = ["The Rust Project Developers"] description = "A regular expression parser." homepage = "https://github.com/rust-lang/regex" documentation = "https://docs.rs/regex-syntax" -license = "MIT/Apache-2.0" +readme = "README.md" +license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex" [features] default = ["unicode"] -unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] +unicode = [ + "unicode-age", + "unicode-bool", + "unicode-case", + "unicode-gencat", + "unicode-perl", + "unicode-script", + "unicode-segment", +] unicode-age = [] unicode-bool = [] unicode-case = [] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 1359aa1..be9aeb5 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,8 +1,8 @@ [package] name = "regex-syntax" -version = "0.6.25" #:version +version = "0.6.29" #:version authors = ["The Rust Project Developers"] -license = "MIT/Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex" documentation = "https://docs.rs/regex-syntax" homepage = "https://github.com/rust-lang/regex" @@ -1,3 +1,7 @@ +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update rust/crates/regex-syntax +# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md + name: "regex-syntax" description: "A regular expression parser." third_party { @@ -7,13 +11,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/regex-syntax/regex-syntax-0.6.25.crate" + value: "https://static.crates.io/crates/regex-syntax/regex-syntax-0.6.29.crate" } - version: "0.6.25" + version: "0.6.29" license_type: NOTICE last_upgrade_date { - year: 2021 - month: 5 - day: 19 + year: 2023 + month: 4 + day: 3 } } @@ -2,9 +2,8 @@ regex-syntax ============ This crate provides a robust regular expression parser. -[![Build status](https://travis-ci.com/rust-lang/regex.svg?branch=master)](https://travis-ci.com/rust-lang/regex) -[![Build status](https://ci.appveyor.com/api/projects/status/github/rust-lang/regex?svg=true)](https://ci.appveyor.com/project/rust-lang-libs/regex) -[![](https://meritbadge.herokuapp.com/regex-syntax)](https://crates.io/crates/regex-syntax) +[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) +[![Crates.io](https://img.shields.io/crates/v/regex-syntax.svg)](https://crates.io/crates/regex-syntax) [![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex) @@ -53,7 +52,7 @@ for extreme optimization, and therefore, use of `unsafe`. The standard for using `unsafe` in this crate is extremely high because this crate is intended to be reasonably safe to use with user supplied regular -expressions. Therefore, while their may be bugs in the regex parser itself, +expressions. Therefore, while there may be bugs in the regex parser itself, they should _never_ result in memory unsafety unless there is either a bug in the compiler or the standard library. (Since `regex-syntax` has zero dependencies.) diff --git a/TEST_MAPPING b/TEST_MAPPING index d661318..afa287b 100644 --- a/TEST_MAPPING +++ b/TEST_MAPPING @@ -5,6 +5,9 @@ "path": "external/rust/crates/base64" }, { + "path": "external/rust/crates/clap/2.33.3" + }, + { "path": "external/rust/crates/libsqlite3-sys" }, { @@ -21,34 +24,28 @@ }, { "path": "external/rust/crates/unicode-xid" - } - ], - "presubmit": [ + }, { - "name": "keystore2_test" + "path": "packages/modules/Virtualization/virtualizationmanager" }, { - "name": "legacykeystore_test" + "path": "system/keymint/hal" }, { - "name": "regex-syntax_test_src_lib" + "path": "system/security/keystore2" }, { - "name": "virtualizationservice_device_test" + "path": "system/security/keystore2/legacykeystore" } ], - "presubmit-rust": [ - { - "name": "keystore2_test" - }, - { - "name": "legacykeystore_test" - }, + "presubmit": [ { "name": "regex-syntax_test_src_lib" - }, + } + ], + "presubmit-rust": [ { - "name": "virtualizationservice_device_test" + "name": "regex-syntax_test_src_lib" } ] } diff --git a/cargo2android.json b/cargo2android.json index 0e54308..bef74ca 100644 --- a/cargo2android.json +++ b/cargo2android.json @@ -7,5 +7,6 @@ "dependencies": true, "device": true, "run": true, - "tests": true + "tests": true, + "vendor-available": true } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9b9127b..9db9afa 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -15,7 +15,7 @@ mod visitor; /// An error that occurred while parsing a regular expression into an abstract /// syntax tree. /// -/// Note that note all ASTs represents a valid regular expression. For example, +/// Note that not all ASTs represents a valid regular expression. For example, /// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a /// valid Unicode property name. That particular error is reported when /// translating an AST to the high-level intermediate representation (`HIR`). @@ -385,7 +385,7 @@ impl PartialOrd for Position { impl Span { /// Create a new span with the given positions. pub fn new(start: Position, end: Position) -> Span { - Span { start: start, end: end } + Span { start, end } } /// Create a new span using the given position as the start and end. @@ -427,7 +427,7 @@ impl Position { /// /// `column` is the approximate column number, starting at `1`. pub fn new(offset: usize, line: usize, column: usize) -> Position { - Position { offset: offset, line: line, column: column } + Position { offset, line, column } } } @@ -1492,8 +1492,19 @@ mod tests { // We run our test on a thread with a small stack size so we can // force the issue more easily. + // + // NOTE(2023-03-21): It turns out that some platforms (like FreeBSD) + // will just barf with very small stack sizes. So we bump this up a bit + // to give more room to breath. When I did this, I confirmed that if + // I remove the custom `Drop` impl for `Ast`, then this test does + // indeed still fail with a stack overflow. (At the time of writing, I + // had to bump it all the way up to 32K before the test would pass even + // without the custom `Drop` impl. So 16K seems like a safe number + // here.) + // + // See: https://github.com/rust-lang/regex/issues/967 thread::Builder::new() - .stack_size(1 << 10) + .stack_size(16 << 10) .spawn(run) .unwrap() .join() diff --git a/src/ast/parse.rs b/src/ast/parse.rs index e62a7c2..6e9c9ac 100644 --- a/src/ast/parse.rs +++ b/src/ast/parse.rs @@ -167,7 +167,7 @@ impl ParserBuilder { /// they should impose a limit on the length, in bytes, of the concrete /// pattern string. In particular, this is viable since this parser /// implementation will limit itself to heap space proportional to the - /// lenth of the pattern string. + /// length of the pattern string. /// /// Note that a nest limit of `0` will return a nest limit error for most /// patterns but not all. For example, a nest limit of `0` permits `a` but @@ -202,7 +202,7 @@ impl ParserBuilder { /// Enable verbose mode in the regular expression. /// - /// When enabled, verbose mode permits insigificant whitespace in many + /// When enabled, verbose mode permits insignificant whitespace in many /// places in the regular expression, as well as comments. Comments are /// started using `#` and continue until the end of the line. /// @@ -236,7 +236,7 @@ pub struct Parser { /// supported. octal: bool, /// The initial setting for `ignore_whitespace` as provided by - /// Th`ParserBuilder`. is is used when reseting the parser's state. + /// `ParserBuilder`. It is used when resetting the parser's state. initial_ignore_whitespace: bool, /// Whether whitespace should be ignored. When enabled, comments are /// also permitted. @@ -366,7 +366,7 @@ impl Parser { impl<'s, P: Borrow<Parser>> ParserI<'s, P> { /// Build an internal parser from a parser configuration and a pattern. fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> { - ParserI { parser: parser, pattern: pattern } + ParserI { parser, pattern } } /// Return a reference to the parser state. @@ -381,11 +381,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { /// Create a new error with the given span and error type. fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error { - ast::Error { - kind: kind, - pattern: self.pattern().to_string(), - span: span, - } + ast::Error { kind, pattern: self.pattern().to_string(), span } } /// Return the current offset of the parser. @@ -481,11 +477,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { column = column.checked_add(1).unwrap(); } offset += self.char().len_utf8(); - self.parser().pos.set(Position { - offset: offset, - line: line, - column: column, - }); + self.parser().pos.set(Position { offset, line, column }); self.pattern()[self.offset()..].chars().next().is_some() } @@ -703,8 +695,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { .unwrap_or(old_ignore_whitespace); self.parser().stack_group.borrow_mut().push( GroupState::Group { - concat: concat, - group: group, + concat, + group, ignore_whitespace: old_ignore_whitespace, }, ); @@ -899,12 +891,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { #[inline(never)] fn unclosed_class_error(&self) -> ast::Error { for state in self.parser().stack_class.borrow().iter().rev() { - match *state { - ClassState::Open { ref set, .. } => { - return self - .error(set.span, ast::ErrorKind::ClassUnclosed); - } - _ => {} + if let ClassState::Open { ref set, .. } = *state { + return self.error(set.span, ast::ErrorKind::ClassUnclosed); } } // We are guaranteed to have a non-empty stack with at least @@ -950,8 +938,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { }; let span = Span::new(lhs.span().start, rhs.span().end); ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span: span, - kind: kind, + span, + kind, lhs: Box::new(lhs), rhs: Box::new(rhs), }) @@ -1010,7 +998,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { let ast = self.pop_group_end(concat)?; NestLimiter::new(self).check(&ast)?; Ok(ast::WithComments { - ast: ast, + ast, comments: mem::replace( &mut *self.parser().comments.borrow_mut(), vec![], @@ -1023,7 +1011,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { /// The given `kind` should correspond to the operator observed by the /// caller. /// - /// This assumes that the paser is currently positioned at the repetition + /// This assumes that the parser is currently positioned at the repetition /// operator and advances the parser to the first character after the /// operator. (Note that the operator may include a single additional `?`, /// which makes the operator ungreedy.) @@ -1066,9 +1054,9 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { span: ast.span().with_end(self.pos()), op: ast::RepetitionOp { span: Span::new(op_start, self.pos()), - kind: kind, + kind, }, - greedy: greedy, + greedy, ast: Box::new(ast), })); Ok(concat) @@ -1078,7 +1066,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { /// corresponds to the {m,n} syntax, and does not include the ?, * or + /// operators. /// - /// This assumes that the paser is currently positioned at the opening `{` + /// This assumes that the parser is currently positioned at the opening `{` /// and advances the parser to the first character after the operator. /// (Note that the operator may include a single additional `?`, which /// makes the operator ungreedy.) @@ -1170,7 +1158,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { span: op_span, kind: ast::RepetitionKind::Range(range), }, - greedy: greedy, + greedy, ast: Box::new(ast), })); Ok(concat) @@ -1235,7 +1223,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { } Ok(Either::Left(ast::SetFlags { span: Span { end: self.pos(), ..open_span }, - flags: flags, + flags, })) } else { assert_eq!(char_end, ':'); @@ -1428,7 +1416,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { let ast = Primitive::Literal(ast::Literal { span: self.span_char(), kind: ast::LiteralKind::Verbatim, - c: c, + c, }); self.bump(); Ok(ast) @@ -1494,16 +1482,16 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { let span = Span::new(start, self.pos()); if is_meta_character(c) { return Ok(Primitive::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Punctuation, - c: c, + c, })); } let special = |kind, c| { Ok(Primitive::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Special(kind), - c: c, + c, })) }; match c { @@ -1517,19 +1505,19 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { special(ast::SpecialLiteralKind::Space, ' ') } 'A' => Ok(Primitive::Assertion(ast::Assertion { - span: span, + span, kind: ast::AssertionKind::StartText, })), 'z' => Ok(Primitive::Assertion(ast::Assertion { - span: span, + span, kind: ast::AssertionKind::EndText, })), 'b' => Ok(Primitive::Assertion(ast::Assertion { - span: span, + span, kind: ast::AssertionKind::WordBoundary, })), 'B' => Ok(Primitive::Assertion(ast::Assertion { - span: span, + span, kind: ast::AssertionKind::NotWordBoundary, })), _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)), @@ -1569,7 +1557,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { ast::Literal { span: Span::new(start, end), kind: ast::LiteralKind::Octal, - c: c, + c, } } @@ -1645,7 +1633,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { Some(c) => Ok(ast::Literal { span: Span::new(start, end), kind: ast::LiteralKind::HexFixed(kind), - c: c, + c, }), } } @@ -1700,7 +1688,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { Some(c) => Ok(ast::Literal { span: Span::new(start, self.pos()), kind: ast::LiteralKind::HexBrace(kind), - c: c, + c, }), } } @@ -1927,7 +1915,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { })); if !self.bump_and_bump_space() { return Err(self.error( - Span::new(start, self.pos()), + Span::new(start, start), ast::ErrorKind::ClassUnclosed, )); } @@ -1949,7 +1937,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { } let set = ast::ClassBracketed { span: Span::new(start, self.pos()), - negated: negated, + negated, kind: ast::ClassSet::union(ast::ClassSetUnion { span: Span::new(union.span.start, union.span.start), items: vec![], @@ -2026,8 +2014,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { }; Some(ast::ClassAscii { span: Span::new(start, self.pos()), - kind: kind, - negated: negated, + kind, + negated, }) } @@ -2108,8 +2096,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { }; Ok(ast::ClassUnicode { span: Span::new(start, self.pos()), - negated: negated, - kind: kind, + negated, + kind, }) } @@ -2130,7 +2118,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { 'W' => (true, ast::ClassPerlKind::Word), c => panic!("expected valid Perl class but got '{}'", c), }; - ast::ClassPerl { span: span, kind: kind, negated: negated } + ast::ClassPerl { span, kind, negated } } } @@ -2146,7 +2134,7 @@ struct NestLimiter<'p, 's, P> { impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> { fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> { - NestLimiter { p: p, depth: 0 } + NestLimiter { p, depth: 0 } } #[inline(never)] @@ -2429,18 +2417,18 @@ mod tests { /// Create a punctuation literal starting at the given position. fn punct_lit(c: char, span: Span) -> Ast { Ast::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Punctuation, - c: c, + c, }) } /// Create a verbatim literal with the given span. fn lit_with(c: char, span: Span) -> Ast { Ast::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Verbatim, - c: c, + c, }) } @@ -2451,12 +2439,12 @@ mod tests { /// Create a concatenation with the given span. fn concat_with(span: Span, asts: Vec<Ast>) -> Ast { - Ast::Concat(ast::Concat { span: span, asts: asts }) + Ast::Concat(ast::Concat { span, asts }) } /// Create an alternation with the given span. fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast { - Ast::Alternation(ast::Alternation { span: span(range), asts: asts }) + Ast::Alternation(ast::Alternation { span: span(range), asts }) } /// Create a capturing group with the given span. @@ -2498,7 +2486,7 @@ mod tests { span: span_range(pat, range.clone()), flags: ast::Flags { span: span_range(pat, (range.start + 2)..(range.end - 1)), - items: items, + items, }, }) } @@ -4208,7 +4196,7 @@ bar Ok(Primitive::Literal(ast::Literal { span: span(0..2), kind: ast::LiteralKind::Special(kind.clone()), - c: c, + c, })) ); } @@ -4402,7 +4390,7 @@ bar kind: ast::LiteralKind::HexFixed( ast::HexLiteralKind::UnicodeShort ), - c: c, + c, })) ); } @@ -4466,7 +4454,7 @@ bar kind: ast::LiteralKind::HexFixed( ast::HexLiteralKind::UnicodeLong ), - c: c, + c, })) ); } @@ -4667,10 +4655,7 @@ bar #[test] fn parse_set_class() { fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet { - ast::ClassSet::union(ast::ClassSetUnion { - span: span, - items: items, - }) + ast::ClassSet::union(ast::ClassSetUnion { span, items }) } fn intersection( @@ -4679,7 +4664,7 @@ bar rhs: ast::ClassSet, ) -> ast::ClassSet { ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span: span, + span, kind: ast::ClassSetBinaryOpKind::Intersection, lhs: Box::new(lhs), rhs: Box::new(rhs), @@ -4692,7 +4677,7 @@ bar rhs: ast::ClassSet, ) -> ast::ClassSet { ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span: span, + span, kind: ast::ClassSetBinaryOpKind::Difference, lhs: Box::new(lhs), rhs: Box::new(rhs), @@ -4705,7 +4690,7 @@ bar rhs: ast::ClassSet, ) -> ast::ClassSet { ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span: span, + span, kind: ast::ClassSetBinaryOpKind::SymmetricDifference, lhs: Box::new(lhs), rhs: Box::new(rhs), @@ -4734,9 +4719,9 @@ bar fn lit(span: Span, c: char) -> ast::ClassSetItem { ast::ClassSetItem::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Verbatim, - c: c, + c, }) } @@ -4756,7 +4741,7 @@ bar ..span.end }; ast::ClassSetItem::Range(ast::ClassSetRange { - span: span, + span, start: ast::Literal { span: Span { end: pos1, ..span }, kind: ast::LiteralKind::Verbatim, @@ -4771,19 +4756,11 @@ bar } fn alnum(span: Span, negated: bool) -> ast::ClassAscii { - ast::ClassAscii { - span: span, - kind: ast::ClassAsciiKind::Alnum, - negated: negated, - } + ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated } } fn lower(span: Span, negated: bool) -> ast::ClassAscii { - ast::ClassAscii { - span: span, - kind: ast::ClassAsciiKind::Lower, - negated: negated, - } + ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated } } assert_eq!( @@ -5515,14 +5492,23 @@ bar assert_eq!( parser("[-").parse_set_class_open().unwrap_err(), TestError { - span: span(0..2), + span: span(0..0), kind: ast::ErrorKind::ClassUnclosed, } ); assert_eq!( parser("[--").parse_set_class_open().unwrap_err(), TestError { - span: span(0..3), + span: span(0..0), + kind: ast::ErrorKind::ClassUnclosed, + } + ); + + // See: https://github.com/rust-lang/regex/issues/792 + assert_eq!( + parser("(?x)[-#]").parse_with_comments().unwrap_err(), + TestError { + span: span(4..4), kind: ast::ErrorKind::ClassUnclosed, } ); diff --git a/src/ast/print.rs b/src/ast/print.rs index 283ce4c..045de2e 100644 --- a/src/ast/print.rs +++ b/src/ast/print.rs @@ -57,17 +57,16 @@ impl Printer { /// here are a `fmt::Formatter` (which is available in `fmt::Display` /// implementations) or a `&mut String`. pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result { - visitor::visit(ast, Writer { printer: self, wtr: wtr }) + visitor::visit(ast, Writer { wtr }) } } #[derive(Debug)] -struct Writer<'p, W> { - printer: &'p mut Printer, +struct Writer<W> { wtr: W, } -impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { +impl<W: fmt::Write> Visitor for Writer<W> { type Output = (); type Err = fmt::Error; @@ -153,7 +152,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { } } -impl<'p, W: fmt::Write> Writer<'p, W> { +impl<W: fmt::Write> Writer<W> { fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result { use crate::ast::GroupKind::*; match ast.kind { diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index a0d1e7d..78ee487 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -388,7 +388,7 @@ impl<'a> HeapVisitor<'a> { Some(ClassFrame::Union { head: item, tail: &[] }) } ast::ClassSet::BinaryOp(ref op) => { - Some(ClassFrame::Binary { op: op }) + Some(ClassFrame::Binary { op }) } } } @@ -402,11 +402,9 @@ impl<'a> HeapVisitor<'a> { }) } } - ClassInduct::BinaryOp(op) => Some(ClassFrame::BinaryLHS { - op: op, - lhs: &op.lhs, - rhs: &op.rhs, - }), + ClassInduct::BinaryOp(op) => { + Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs }) + } _ => None, } } @@ -427,7 +425,7 @@ impl<'a> HeapVisitor<'a> { } ClassFrame::Binary { .. } => None, ClassFrame::BinaryLHS { op, rhs, .. } => { - Some(ClassFrame::BinaryRHS { op: op, rhs: rhs }) + Some(ClassFrame::BinaryRHS { op, rhs }) } ClassFrame::BinaryRHS { .. } => None, } diff --git a/src/error.rs b/src/error.rs index 71cfa42..1230d2f 100644 --- a/src/error.rs +++ b/src/error.rs @@ -182,7 +182,7 @@ impl<'p> Spans<'p> { if line_count <= 1 { 0 } else { line_count.to_string().len() }; let mut spans = Spans { pattern: &fmter.pattern, - line_number_width: line_number_width, + line_number_width, by_line: vec![vec![]; line_count], multi_line: vec![], }; @@ -288,7 +288,7 @@ fn repeat_char(c: char, count: usize) -> String { mod tests { use crate::ast::parse::Parser; - fn assert_panic_message(pattern: &str, expected_msg: &str) -> () { + fn assert_panic_message(pattern: &str, expected_msg: &str) { let result = Parser::new().parse(pattern); match result { Ok(_) => { diff --git a/src/hir/interval.rs b/src/hir/interval.rs index cfaa2cb..56698c5 100644 --- a/src/hir/interval.rs +++ b/src/hir/interval.rs @@ -114,8 +114,8 @@ impl<I: Interval> IntervalSet<I> { // we're done. let drain_end = self.ranges.len(); - let mut ita = (0..drain_end).into_iter(); - let mut itb = (0..other.ranges.len()).into_iter(); + let mut ita = 0..drain_end; + let mut itb = 0..other.ranges.len(); let mut a = ita.next().unwrap(); let mut b = itb.next().unwrap(); loop { diff --git a/src/hir/literal/mod.rs b/src/hir/literal/mod.rs index 25ee88b..fbc5d3c 100644 --- a/src/hir/literal/mod.rs +++ b/src/hir/literal/mod.rs @@ -225,7 +225,7 @@ impl Literals { if self.lits.is_empty() { return self.to_empty(); } - let mut old: Vec<Literal> = self.lits.iter().cloned().collect(); + let mut old = self.lits.to_vec(); let mut new = self.to_empty(); 'OUTER: while let Some(mut candidate) = old.pop() { if candidate.is_empty() { @@ -256,15 +256,13 @@ impl Literals { old.push(lit3); lit2.clear(); } - } else { - if let Some(i) = position(&lit2, &candidate) { - lit2.cut(); - let mut new_candidate = candidate.clone(); - new_candidate.truncate(i); - new_candidate.cut(); - old.push(new_candidate); - candidate.clear(); - } + } else if let Some(i) = position(&lit2, &candidate) { + lit2.cut(); + let mut new_candidate = candidate.clone(); + new_candidate.truncate(i); + new_candidate.cut(); + old.push(new_candidate); + candidate.clear(); } // Oops, the candidate is already represented in the set. if candidate.is_empty() { @@ -735,18 +733,18 @@ fn repeat_zero_or_one_literals<F: FnMut(&Hir, &mut Literals)>( lits: &mut Literals, mut f: F, ) { - let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty()); - lits3.set_limit_size(lits.limit_size() / 2); - f(e, &mut lits3); - - if lits3.is_empty() || !lits2.cross_product(&lits3) { - lits.cut(); - return; - } - lits2.add(Literal::empty()); - if !lits.union(lits2) { - lits.cut(); - } + f( + &Hir::repetition(hir::Repetition { + kind: hir::RepetitionKind::ZeroOrMore, + // FIXME: Our literal extraction doesn't care about greediness. + // Which is partially why we're treating 'e?' as 'e*'. Namely, + // 'ab??' yields [Complete(ab), Complete(a)], but it should yield + // [Complete(a), Complete(ab)] because of the non-greediness. + greedy: true, + hir: Box::new(e.clone()), + }), + lits, + ); } fn repeat_zero_or_more_literals<F: FnMut(&Hir, &mut Literals)>( @@ -793,7 +791,7 @@ fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>( f( &Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrMore, - greedy: greedy, + greedy, hir: Box::new(e.clone()), }), lits, @@ -932,12 +930,10 @@ fn escape_unicode(bytes: &[u8]) -> String { if c.is_whitespace() { let escaped = if c as u32 <= 0x7F { escape_byte(c as u8) + } else if c as u32 <= 0xFFFF { + format!(r"\u{{{:04x}}}", c as u32) } else { - if c as u32 <= 0xFFFF { - format!(r"\u{{{:04x}}}", c as u32) - } else { - format!(r"\U{{{:08x}}}", c as u32) - } + format!(r"\U{{{:08x}}}", c as u32) }; space_escaped.push_str(&escaped); } else { @@ -1141,6 +1137,11 @@ mod tests { test_lit!(pfx_group1, prefixes, "(a)", M("a")); test_lit!(pfx_rep_zero_or_one1, prefixes, "a?"); test_lit!(pfx_rep_zero_or_one2, prefixes, "(?:abc)?"); + test_lit!(pfx_rep_zero_or_one_cat1, prefixes, "ab?", C("ab"), M("a")); + // FIXME: This should return [M("a"), M("ab")] because of the non-greedy + // repetition. As a work-around, we rewrite ab?? as ab*?, and thus we get + // a cut literal. + test_lit!(pfx_rep_zero_or_one_cat2, prefixes, "ab??", C("ab"), M("a")); test_lit!(pfx_rep_zero_or_more1, prefixes, "a*"); test_lit!(pfx_rep_zero_or_more2, prefixes, "(?:abc)*"); test_lit!(pfx_rep_one_or_more1, prefixes, "a+", C("a")); @@ -1249,8 +1250,8 @@ mod tests { pfx_crazy1, prefixes, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - C("Mo\\'am"), - C("Mu\\'am"), + C("Mo\\'"), + C("Mu\\'"), C("Moam"), C("Muam") ); diff --git a/src/hir/mod.rs b/src/hir/mod.rs index 4969f12..156bcc2 100644 --- a/src/hir/mod.rs +++ b/src/hir/mod.rs @@ -243,7 +243,7 @@ impl Hir { info.set_match_empty(true); info.set_literal(false); info.set_alternation_literal(false); - Hir { kind: HirKind::Empty, info: info } + Hir { kind: HirKind::Empty, info } } /// Creates a literal HIR expression. @@ -268,7 +268,7 @@ impl Hir { info.set_match_empty(false); info.set_literal(true); info.set_alternation_literal(true); - Hir { kind: HirKind::Literal(lit), info: info } + Hir { kind: HirKind::Literal(lit), info } } /// Creates a class HIR expression. @@ -285,7 +285,7 @@ impl Hir { info.set_match_empty(false); info.set_literal(false); info.set_alternation_literal(false); - Hir { kind: HirKind::Class(class), info: info } + Hir { kind: HirKind::Class(class), info } } /// Creates an anchor assertion HIR expression. @@ -318,7 +318,7 @@ impl Hir { if let Anchor::EndLine = anchor { info.set_line_anchored_end(true); } - Hir { kind: HirKind::Anchor(anchor), info: info } + Hir { kind: HirKind::Anchor(anchor), info } } /// Creates a word boundary assertion HIR expression. @@ -334,14 +334,18 @@ impl Hir { info.set_any_anchored_end(false); info.set_literal(false); info.set_alternation_literal(false); - // A negated word boundary matches the empty string, but a normal - // word boundary does not! - info.set_match_empty(word_boundary.is_negated()); + // A negated word boundary matches '', so that's fine. But \b does not + // match \b, so why do we say it can match the empty string? Well, + // because, if you search for \b against 'a', it will report [0, 0) and + // [1, 1) as matches, and both of those matches correspond to the empty + // string. Thus, only *certain* empty strings match \b, which similarly + // applies to \B. + info.set_match_empty(true); // Negated ASCII word boundaries can match invalid UTF-8. if let WordBoundary::AsciiNegate = word_boundary { info.set_always_utf8(false); } - Hir { kind: HirKind::WordBoundary(word_boundary), info: info } + Hir { kind: HirKind::WordBoundary(word_boundary), info } } /// Creates a repetition HIR expression. @@ -368,7 +372,7 @@ impl Hir { info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty()); info.set_literal(false); info.set_alternation_literal(false); - Hir { kind: HirKind::Repetition(rep), info: info } + Hir { kind: HirKind::Repetition(rep), info } } /// Creates a group HIR expression. @@ -385,7 +389,7 @@ impl Hir { info.set_match_empty(group.hir.is_match_empty()); info.set_literal(false); info.set_alternation_literal(false); - Hir { kind: HirKind::Group(group), info: info } + Hir { kind: HirKind::Group(group), info } } /// Returns the concatenation of the given expressions. @@ -476,7 +480,7 @@ impl Hir { }) .any(|e| e.is_line_anchored_end()), ); - Hir { kind: HirKind::Concat(exprs), info: info } + Hir { kind: HirKind::Concat(exprs), info } } } } @@ -538,7 +542,7 @@ impl Hir { let x = info.is_alternation_literal() && e.is_literal(); info.set_alternation_literal(x); } - Hir { kind: HirKind::Alternation(exprs), info: info } + Hir { kind: HirKind::Alternation(exprs), info } } } } @@ -661,8 +665,8 @@ impl Hir { /// Return true if and only if the empty string is part of the language /// matched by this regular expression. /// - /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\B`, - /// but not `a`, `a+` or `\b`. + /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\b` + /// and `\B`, but not `a` or `a+`. pub fn is_match_empty(&self) -> bool { self.info.is_match_empty() } @@ -2282,8 +2286,11 @@ mod tests { // We run our test on a thread with a small stack size so we can // force the issue more easily. + // + // NOTE(2023-03-21): See the corresponding test in 'crate::ast::tests' + // for context on the specific stack size chosen here. thread::Builder::new() - .stack_size(1 << 10) + .stack_size(16 << 10) .spawn(run) .unwrap() .join() diff --git a/src/hir/print.rs b/src/hir/print.rs index ff18c6e..b71f389 100644 --- a/src/hir/print.rs +++ b/src/hir/print.rs @@ -65,17 +65,16 @@ impl Printer { /// here are a `fmt::Formatter` (which is available in `fmt::Display` /// implementations) or a `&mut String`. pub fn print<W: fmt::Write>(&mut self, hir: &Hir, wtr: W) -> fmt::Result { - visitor::visit(hir, Writer { printer: self, wtr: wtr }) + visitor::visit(hir, Writer { wtr }) } } #[derive(Debug)] -struct Writer<'p, W> { - printer: &'p mut Printer, +struct Writer<W> { wtr: W, } -impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { +impl<W: fmt::Write> Visitor for Writer<W> { type Output = (); type Err = fmt::Error; @@ -209,7 +208,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { } } -impl<'p, W: fmt::Write> Writer<'p, W> { +impl<W: fmt::Write> Writer<W> { fn write_literal_char(&mut self, c: char) -> fmt::Result { if is_meta_character(c) { self.wtr.write_str("\\")?; diff --git a/src/hir/translate.rs b/src/hir/translate.rs index 99c9493..890e160 100644 --- a/src/hir/translate.rs +++ b/src/hir/translate.rs @@ -434,20 +434,14 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { } ast::ClassSetItem::Ascii(ref x) => { if self.flags().unicode() { + let xcls = self.hir_ascii_unicode_class(x)?; let mut cls = self.pop().unwrap().unwrap_class_unicode(); - for &(s, e) in ascii_class(&x.kind) { - cls.push(hir::ClassUnicodeRange::new(s, e)); - } - self.unicode_fold_and_negate( - &x.span, x.negated, &mut cls, - )?; + cls.union(&xcls); self.push(HirFrame::ClassUnicode(cls)); } else { + let xcls = self.hir_ascii_byte_class(x)?; let mut cls = self.pop().unwrap().unwrap_class_bytes(); - for &(s, e) in ascii_class(&x.kind) { - cls.push(hir::ClassBytesRange::new(s as u8, e as u8)); - } - self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?; + cls.union(&xcls); self.push(HirFrame::ClassBytes(cls)); } } @@ -595,7 +589,7 @@ struct TranslatorI<'t, 'p> { impl<'t, 'p> TranslatorI<'t, 'p> { /// Build a new internal translator. fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { - TranslatorI { trans: trans, pattern: pattern } + TranslatorI { trans, pattern } } /// Return a reference to the underlying translator. @@ -615,7 +609,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { /// Create a new error with the given span and error type. fn error(&self, span: Span, kind: ErrorKind) -> Error { - Error { kind: kind, pattern: self.pattern.to_string(), span: span } + Error { kind, pattern: self.pattern.to_string(), span } } /// Return a copy of the active flags. @@ -785,7 +779,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { } ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, }; - Hir::group(hir::Group { kind: kind, hir: Box::new(expr) }) + Hir::group(hir::Group { kind, hir: Box::new(expr) }) } fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { @@ -808,11 +802,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { }; let greedy = if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; - Hir::repetition(hir::Repetition { - kind: kind, - greedy: greedy, - hir: Box::new(expr), - }) + Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) }) } fn hir_unicode_class( @@ -853,6 +843,32 @@ impl<'t, 'p> TranslatorI<'t, 'p> { result } + fn hir_ascii_unicode_class( + &self, + ast: &ast::ClassAscii, + ) -> Result<hir::ClassUnicode> { + let mut cls = hir::ClassUnicode::new( + ascii_class(&ast.kind) + .iter() + .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)), + ); + self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?; + Ok(cls) + } + + fn hir_ascii_byte_class( + &self, + ast: &ast::ClassAscii, + ) -> Result<hir::ClassBytes> { + let mut cls = hir::ClassBytes::new( + ascii_class(&ast.kind) + .iter() + .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)), + ); + self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?; + Ok(cls) + } + fn hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, @@ -948,7 +964,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { class: &mut hir::ClassBytes, ) -> Result<()> { // Note that we must apply case folding before negation! - // Consider `(?i)[^x]`. If we applied negation field, then + // Consider `(?i)[^x]`. If we applied negation first, then // the result would be the character class that matched any // Unicode scalar value. if self.flags().case_insensitive() { @@ -1218,7 +1234,7 @@ mod tests { fn hir_quest(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrOne, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1226,7 +1242,7 @@ mod tests { fn hir_star(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrMore, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1234,7 +1250,7 @@ mod tests { fn hir_plus(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::OneOrMore, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1242,7 +1258,7 @@ mod tests { fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::Range(range), - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1944,6 +1960,25 @@ mod tests { } #[test] + fn class_ascii_multiple() { + // See: https://github.com/rust-lang/regex/issues/680 + assert_eq!( + t("[[:alnum:][:^ascii:]]"), + hir_union( + hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)), + hir_uclass(&[('\u{80}', '\u{10FFFF}')]), + ), + ); + assert_eq!( + t_bytes("(?-u)[[:alnum:][:^ascii:]]"), + hir_union( + hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)), + hir_bclass(&[(0x80, 0xFF)]), + ), + ); + } + + #[test] #[cfg(feature = "unicode-perl")] fn class_perl() { // Unicode @@ -3100,6 +3135,9 @@ mod tests { assert!(t(r"\pL*").is_match_empty()); assert!(t(r"a*|b").is_match_empty()); assert!(t(r"b|a*").is_match_empty()); + assert!(t(r"a|").is_match_empty()); + assert!(t(r"|a").is_match_empty()); + assert!(t(r"a||b").is_match_empty()); assert!(t(r"a*a?(abcd)*").is_match_empty()); assert!(t(r"^").is_match_empty()); assert!(t(r"$").is_match_empty()); @@ -3109,6 +3147,8 @@ mod tests { assert!(t(r"\z").is_match_empty()); assert!(t(r"\B").is_match_empty()); assert!(t_bytes(r"(?-u)\B").is_match_empty()); + assert!(t(r"\b").is_match_empty()); + assert!(t(r"(?-u)\b").is_match_empty()); // Negative examples. assert!(!t(r"a+").is_match_empty()); @@ -3118,8 +3158,6 @@ mod tests { assert!(!t(r"a{1,10}").is_match_empty()); assert!(!t(r"b|a").is_match_empty()); assert!(!t(r"a*a+(abcd)*").is_match_empty()); - assert!(!t(r"\b").is_match_empty()); - assert!(!t(r"(?-u)\b").is_match_empty()); } #[test] @@ -195,7 +195,7 @@ pub fn escape_into(text: &str, buf: &mut String) { } } -/// Returns true if the give character has significance in a regex. +/// Returns true if the given character has significance in a regex. /// /// These are the only characters that are allowed to be escaped, with one /// exception: an ASCII space character may be escaped when extended mode (with diff --git a/src/parser.rs b/src/parser.rs index eb363ca..ded95b2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -45,7 +45,7 @@ impl ParserBuilder { /// they should impose a limit on the length, in bytes, of the concrete /// pattern string. In particular, this is viable since this parser /// implementation will limit itself to heap space proportional to the - /// lenth of the pattern string. + /// length of the pattern string. /// /// Note that a nest limit of `0` will return a nest limit error for most /// patterns but not all. For example, a nest limit of `0` permits `a` but @@ -96,7 +96,7 @@ impl ParserBuilder { /// Enable verbose mode in the regular expression. /// - /// When enabled, verbose mode permits insigificant whitespace in many + /// When enabled, verbose mode permits insignificant whitespace in many /// places in the regular expression, as well as comments. Comments are /// started using `#` and continue until the end of the line. /// diff --git a/src/unicode.rs b/src/unicode.rs index b894c7d..8194d7f 100644 --- a/src/unicode.rs +++ b/src/unicode.rs @@ -99,7 +99,7 @@ pub fn simple_fold( Ok(CASE_FOLDING_SIMPLE .binary_search_by_key(&c, |&(c1, _)| c1) - .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().map(|&c| c)) + .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().copied()) .map_err(|i| { if i >= CASE_FOLDING_SIMPLE.len() { None @@ -580,7 +580,7 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> { fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> { use crate::unicode_tables::age; - const AGES: &'static [(&'static str, Range)] = &[ + const AGES: &[(&str, Range)] = &[ ("V1_1", age::V1_1), ("V2_0", age::V2_0), ("V2_1", age::V2_1), @@ -604,13 +604,15 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> { ("V12_0", age::V12_0), ("V12_1", age::V12_1), ("V13_0", age::V13_0), + ("V14_0", age::V14_0), + ("V15_0", age::V15_0), ]; assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync"); let pos = AGES.iter().position(|&(age, _)| canonical_age == age); match pos { None => Err(Error::PropertyValueNotFound), - Some(i) => Ok(AGES[..i + 1].iter().map(|&(_, classes)| classes)), + Some(i) => Ok(AGES[..=i].iter().map(|&(_, classes)| classes)), } } diff --git a/src/unicode_tables/age.rs b/src/unicode_tables/age.rs index 7772919..71f4861 100644 --- a/src/unicode_tables/age.rs +++ b/src/unicode_tables/age.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate age ucd-13.0.0 --chars +// ucd-generate age ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("V10_0", V10_0), @@ -12,6 +12,8 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("V12_0", V12_0), ("V12_1", V12_1), ("V13_0", V13_0), + ("V14_0", V14_0), + ("V15_0", V15_0), ("V1_1", V1_1), ("V2_0", V2_0), ("V2_1", V2_1), @@ -203,69 +205,185 @@ pub const V12_0: &'static [(char, char)] = &[ pub const V12_1: &'static [(char, char)] = &[('ăż', 'ăż')]; pub const V13_0: &'static [(char, char)] = &[ - ('\u{8be}', '\u{8c7}'), + ('àąŸ', 'àŁ'), ('\u{b55}', '\u{b55}'), - ('\u{d04}', '\u{d04}'), + ('àŽ', 'àŽ'), ('\u{d81}', '\u{d81}'), ('\u{1abf}', '\u{1ac0}'), - ('\u{2b97}', '\u{2b97}'), - ('\u{2e50}', '\u{2e52}'), - ('\u{31bb}', '\u{31bf}'), - ('\u{4db6}', '\u{4dbf}'), - ('\u{9ff0}', '\u{9ffc}'), - ('\u{a7c7}', '\u{a7ca}'), - ('\u{a7f5}', '\u{a7f6}'), + ('âź', 'âź'), + ('âč', 'âč'), + ('ă»', 'ăż'), + ('䶶', '䶿'), + ('éż°', 'éżŒ'), + ('ê', 'ê'), + ('ê”', 'ê¶'), ('\u{a82c}', '\u{a82c}'), - ('\u{ab68}', '\u{ab6b}'), - ('\u{1019c}', '\u{1019c}'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eab}', '\u{10ead}'), - ('\u{10eb0}', '\u{10eb1}'), - ('\u{10fb0}', '\u{10fcb}'), - ('\u{11147}', '\u{11147}'), - ('\u{111ce}', '\u{111cf}'), - ('\u{1145a}', '\u{1145a}'), - ('\u{11460}', '\u{11461}'), - ('\u{11900}', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193b}', '\u{11946}'), - ('\u{11950}', '\u{11959}'), - ('\u{11fb0}', '\u{11fb0}'), + ('êš', 'ê«'), + ('đ', 'đ'), + ('đș', 'đș©'), + ('\u{10eab}', 'đș'), + ('đș°', 'đș±'), + ('đŸ°', 'đż'), + ('đ
', 'đ
'), + ('đ', '\u{111cf}'), + ('đ', 'đ'), + ('đ ', 'đĄ'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€”'), + ('đ€·', 'đ€ž'), + ('\u{1193b}', 'đ„'), + ('đ„', 'đ„'), + ('đŸ°', 'đŸ°'), ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{18af3}', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('\u{1f10d}', '\u{1f10f}'), - ('\u{1f16d}', '\u{1f16f}'), - ('\u{1f1ad}', '\u{1f1ad}'), - ('\u{1f6d6}', '\u{1f6d7}'), - ('\u{1f6fb}', '\u{1f6fc}'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('\u{1f90c}', '\u{1f90c}'), - ('\u{1f972}', '\u{1f972}'), - ('\u{1f977}', '\u{1f978}'), - ('\u{1f9a3}', '\u{1f9a4}'), - ('\u{1f9ab}', '\u{1f9ad}'), - ('\u{1f9cb}', '\u{1f9cb}'), - ('\u{1fa74}', '\u{1fa74}'), - ('\u{1fa83}', '\u{1fa86}'), - ('\u{1fa96}', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('\u{2a6d7}', '\u{2a6dd}'), - ('\u{30000}', '\u{3134a}'), + ('đż°', 'đż±'), + ('đ«ł', 'đł'), + ('đŽ', 'đŽ'), + ('đ', 'đ'), + ('đ
', 'đ
Ż'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ»', 'đŒ'), + ('đą°', 'đą±'), + ('đ€', 'đ€'), + ('đ„Č', 'đ„Č'), + ('đ„·', 'đ„ž'), + ('đŠŁ', 'đŠ€'), + ('đŠ«', 'đŠ'), + ('đ§', 'đ§'), + ('đ©Ž', 'đ©Ž'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘš'), + ('đȘ°', 'đȘ¶'), + ('đ«', 'đ«'), + ('đ«', 'đ«'), + ('đŹ', 'đź'), + ('đź', 'đŻ'), + ('đŻ°', 'đŻč'), + ('đȘ', 'đȘ'), + ('đ°', 'đ±'), +]; + +pub const V14_0: &'static [(char, char)] = &[ + ('Ű', 'Ű'), + ('àĄ°', 'àą'), + ('\u{890}', '\u{891}'), + ('\u{898}', '\u{89f}'), + ('àą”', 'àą”'), + ('àŁ', '\u{8d2}'), + ('\u{c3c}', '\u{c3c}'), + ('à±', 'à±'), + ('àł', 'àł'), + ('á', 'á'), + ('á', 'á'), + ('á', 'á'), + ('\u{180f}', '\u{180f}'), + ('\u{1ac1}', '\u{1ace}'), + ('á', 'á'), + ('áœ', 'áŸ'), + ('\u{1dfa}', '\u{1dfa}'), + ('â', 'â'), + ('â°Ż', 'â°Ż'), + ('â±', 'â±'), + ('âč', 'âč'), + ('éżœ', 'éżż'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'êŽ'), + ('ïŻ', 'ïŻ'), + ('ï”', 'ï”'), + ('ï·', 'ï·'), + ('ï·Ÿ', 'ï·ż'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), + ('đœ°', 'đŸ'), + ('\u{11070}', 'đ”'), + ('\u{110c2}', '\u{110c2}'), + ('đč', 'đč'), + ('đ', 'đ'), + ('đȘ°', 'đȘż'), + ('đŸ', 'đżČ'), + ('đ©°', 'đȘŸ'), + ('đ«', 'đ«'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), + ('đœ', 'đż'), + ('đ©', 'đȘ'), + ('đŒ', 'đŒ'), + ('đ', '\u{1e2ae}'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), + ('đ', 'đ'), + ('đ°', 'đ°'), + ('đ„č', 'đ„č'), + ('đ§', 'đ§'), + ('đ©»', 'đ©Œ'), + ('đȘ©', 'đȘŹ'), + ('đȘ·', 'đȘș'), + ('đ«', 'đ«
'), + ('đ«', 'đ«'), + ('đ« ', 'đ«§'), + ('đ«°', 'đ«¶'), + ('đȘ', 'đȘ'), + ('đ«”', 'đ«ž'), +]; + +pub const V15_0: &'static [(char, char)] = &[ + ('àłł', 'àłł'), + ('\u{ece}', '\u{ece}'), + ('\u{10efd}', '\u{10eff}'), + ('đż', '\u{11241}'), + ('đŹ', 'đŹ'), + ('\u{11f00}', 'đŒ'), + ('đŒ', '\u{11f3a}'), + ('đŒŸ', 'đœ'), + ('đŻ', 'đŻ'), + ('\u{13439}', '\u{13455}'), + ('đČ', 'đČ'), + ('đ
', 'đ
'), + ('đ', 'đ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), + ('\u{1e08f}', '\u{1e08f}'), + ('đ', 'đč'), + ('đ', 'đ'), + ('đŽ', 'đ¶'), + ('đ»', 'đż'), + ('đ', 'đ'), + ('đ©”', 'đ©·'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘŻ'), + ('đȘ»', 'đȘœ'), + ('đȘż', 'đȘż'), + ('đ«', 'đ«'), + ('đ«', 'đ«'), + ('đ«š', 'đ«š'), + ('đ«·', 'đ«ž'), + ('đ«č', 'đ«č'), + ('đ±', 'đČŻ'), ]; pub const V1_1: &'static [(char, char)] = &[ - ('\u{0}', 'Ç”'), + ('\0', 'Ç”'), ('Çș', 'È'), ('É', 'Êš'), ('Ê°', 'Ë'), diff --git a/src/unicode_tables/case_folding_simple.rs b/src/unicode_tables/case_folding_simple.rs index cfb83f3..23f9364 100644 --- a/src/unicode_tables/case_folding_simple.rs +++ b/src/unicode_tables/case_folding_simple.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate case-folding-simple ucd-13.0.0 --chars --all-pairs +// ucd-generate case-folding-simple ucd-15.0.0 --chars --all-pairs // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('A', &['a']), @@ -1781,6 +1781,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('â°Ź', &['â±']), ('â°', &['â±']), ('â°ź', &['â±']), + ('â°Ż', &['â±']), ('â°°', &['â°']), ('â°±', &['â°']), ('â°Č', &['â°']), @@ -1828,6 +1829,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('â±', &['â°Ź']), ('â±', &['â°']), ('â±', &['â°ź']), + ('â±', &['â°Ż']), ('â± ', &['ⱥ']), ('ⱥ', &['â± ']), ('ⱹ', &['É«']), @@ -2211,17 +2213,25 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('êœ', &['êŒ']), ('êŸ', &['êż']), ('êż', &['êŸ']), + ('ê', &['ê']), + ('ê', &['ê']), ('ê', &['ê']), ('ê', &['ê']), ('ê', &['ê']), ('ê
', &['Ê']), ('ê', &['á¶']), - ('\u{a7c7}', &['\u{a7c8}']), - ('\u{a7c8}', &['\u{a7c7}']), - ('\u{a7c9}', &['\u{a7ca}']), - ('\u{a7ca}', &['\u{a7c9}']), - ('\u{a7f5}', &['\u{a7f6}']), - ('\u{a7f6}', &['\u{a7f5}']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê', &['ê']), + ('ê”', &['ê¶']), + ('ê¶', &['ê”']), ('ê', &['êł']), ('ê°', &['á ']), ('ê±', &['áĄ']), @@ -2507,6 +2517,76 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('đč', &['đ']), ('đș', &['đ']), ('đ»', &['đ']), + ('đ°', &['đ']), + ('đ±', &['đ']), + ('đČ', &['đ']), + ('đł', &['đ']), + ('đŽ', &['đ']), + ('đ”', &['đ']), + ('đ¶', &['đ']), + ('đ·', &['đ']), + ('đž', &['đ']), + ('đč', &['đ ']), + ('đș', &['đĄ']), + ('đŒ', &['đŁ']), + ('đœ', &['đ€']), + ('đŸ', &['đ„']), + ('đż', &['đŠ']), + ('đ', &['đ§']), + ('đ', &['đš']), + ('đ', &['đ©']), + ('đ', &['đȘ']), + ('đ', &['đ«']), + ('đ
', &['đŹ']), + ('đ', &['đ']), + ('đ', &['đź']), + ('đ', &['đŻ']), + ('đ', &['đ°']), + ('đ', &['đ±']), + ('đ', &['đł']), + ('đ', &['đŽ']), + ('đ', &['đ”']), + ('đ', &['đ¶']), + ('đ', &['đ·']), + ('đ', &['đž']), + ('đ', &['đč']), + ('đ', &['đ»']), + ('đ', &['đŒ']), + ('đ', &['đ°']), + ('đ', &['đ±']), + ('đ', &['đČ']), + ('đ', &['đł']), + ('đ', &['đŽ']), + ('đ', &['đ”']), + ('đ', &['đ¶']), + ('đ', &['đ·']), + ('đ', &['đž']), + ('đ ', &['đč']), + ('đĄ', &['đș']), + ('đŁ', &['đŒ']), + ('đ€', &['đœ']), + ('đ„', &['đŸ']), + ('đŠ', &['đż']), + ('đ§', &['đ']), + ('đš', &['đ']), + ('đ©', &['đ']), + ('đȘ', &['đ']), + ('đ«', &['đ']), + ('đŹ', &['đ
']), + ('đ', &['đ']), + ('đź', &['đ']), + ('đŻ', &['đ']), + ('đ°', &['đ']), + ('đ±', &['đ']), + ('đł', &['đ']), + ('đŽ', &['đ']), + ('đ”', &['đ']), + ('đ¶', &['đ']), + ('đ·', &['đ']), + ('đž', &['đ']), + ('đč', &['đ']), + ('đ»', &['đ']), + ('đŒ', &['đ']), ('đČ', &['đł']), ('đČ', &['đł']), ('đČ', &['đł']), diff --git a/src/unicode_tables/general_category.rs b/src/unicode_tables/general_category.rs index 33b7b7e..8fc9289 100644 --- a/src/unicode_tables/general_category.rs +++ b/src/unicode_tables/general_category.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate general-category ucd-13.0.0 --chars --exclude surrogate +// ucd-generate general-category ucd-15.0.0 --chars --exclude surrogate // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Cased_Letter", CASED_LETTER), @@ -116,9 +116,7 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â
'), ('â', 'â'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'â±»'), + ('â°', 'â±»'), ('ⱟ', 'Ⳁ'), ('âł«', 'âłź'), ('âłČ', 'âłł'), @@ -130,12 +128,14 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('êą', 'êŻ'), ('ê±', 'ê'), ('ê', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', '\u{a7f6}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê”', 'ê¶'), ('êș', 'êș'), ('êŹ°', 'ê'), - ('ê ', '\u{ab68}'), + ('ê ', 'êš'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), ('ïŹ', 'ïŹ'), @@ -144,6 +144,14 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ°', 'đ'), ('đ', 'đ»'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đČ', 'đČČ'), ('đł', 'đłČ'), ('đą ', 'đŁ'), @@ -178,6 +186,9 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('đ', 'đš'), ('đȘ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), ('đ€', 'đ„'), ]; @@ -225,6 +236,10 @@ pub const CLOSE_PUNCTUATION: &'static [(char, char)] = &[ ('âž„', 'âž„'), ('➧', '➧'), ('âž©', 'âž©'), + ('âč', 'âč'), + ('âč', 'âč'), + ('âč', 'âč'), + ('âč', 'âč'), ('ă', 'ă'), ('ă', 'ă'), ('ă', 'ă'), @@ -266,7 +281,7 @@ pub const CONNECTOR_PUNCTUATION: &'static [(char, char)] = &[ ]; pub const CONTROL: &'static [(char, char)] = - &[('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}')]; + &[('\0', '\u{1f}'), ('\u{7f}', '\u{9f}')]; pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[ ('$', '$'), @@ -280,7 +295,7 @@ pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[ ('àŻč', 'àŻč'), ('àžż', 'àžż'), ('á', 'á'), - ('â ', 'âż'), + ('â ', 'â'), ('ê ž', 'ê ž'), ('ï·Œ', 'ï·Œ'), ('ïč©', 'ïč©'), @@ -303,6 +318,7 @@ pub const DASH_PUNCTUATION: &'static [(char, char)] = &[ ('âž', 'âž'), ('âžș', 'âž»'), ('âč', 'âč'), + ('âč', 'âč'), ('ă', 'ă'), ('ă°', 'ă°'), ('ă ', 'ă '), @@ -310,7 +326,7 @@ pub const DASH_PUNCTUATION: &'static [(char, char)] = &[ ('ïč', 'ïč'), ('ïčŁ', 'ïčŁ'), ('ïŒ', 'ïŒ'), - ('\u{10ead}', '\u{10ead}'), + ('đș', 'đș'), ]; pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ @@ -364,17 +380,20 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ°', 'đč'), ('đŁ ', 'đŁ©'), - ('\u{11950}', '\u{11959}'), + ('đ„', 'đ„'), ('đ±', 'đ±'), ('đ”', 'đ”'), ('đ¶ ', 'đ¶©'), + ('đœ', 'đœ'), ('đ© ', 'đ©©'), + ('đ«', 'đ«'), ('đ', 'đ'), ('đ', 'đż'), ('đ
', 'đ
'), ('đ°', 'đč'), + ('đ°', 'đč'), ('đ„', 'đ„'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('đŻ°', 'đŻč'), ]; pub const ENCLOSING_MARK: &'static [(char, char)] = &[ @@ -404,6 +423,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{61c}', '\u{61c}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200f}'), @@ -414,7 +434,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), - ('\u{13430}', '\u{13438}'), + ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0001}', '\u{e0001}'), @@ -485,8 +505,9 @@ pub const LETTER: &'static [(char, char)] = &[ ('à š', 'à š'), ('àĄ', 'àĄ'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('àą ', 'àŁ'), ('à€', 'à€č'), ('à€œ', 'à€œ'), ('à„', 'à„'), @@ -551,6 +572,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('à°Ș', 'à°č'), ('à°œ', 'à°œ'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', 'à±Ą'), ('àČ', 'àČ'), ('àČ
', 'àČ'), @@ -559,10 +581,10 @@ pub const LETTER: &'static [(char, char)] = &[ ('àČȘ', 'àČł'), ('àČ”', 'àČč'), ('àČœ', 'àČœ'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', 'àłĄ'), ('àł±', 'àłČ'), - ('\u{d04}', 'àŽ'), + ('àŽ', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', 'àŽș'), ('àŽœ', 'àŽœ'), @@ -630,9 +652,8 @@ pub const LETTER: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('á±', 'áž'), - ('á', 'á'), - ('á', 'á'), - ('á ', 'á±'), + ('á', 'á'), + ('á', 'á±'), ('á', 'á'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -653,7 +674,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('áš ', 'á©'), ('áȘ§', 'áȘ§'), ('áŹ
', 'Ᏻ'), - ('á
', 'á'), + ('á
', 'á'), ('áź', 'áź '), ('áźź', '៯'), ('áźș', 'ᯄ'), @@ -704,9 +725,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â
'), ('â', 'â'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłź'), ('âłČ', 'âłł'), ('âŽ', '⎄'), @@ -733,11 +752,10 @@ pub const LETTER: &'static [(char, char)] = &[ ('ăŒ', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), ('ê', 'ê'), @@ -747,9 +765,11 @@ pub const LETTER: &'static [(char, char)] = &[ ('ê ', 'ê„'), ('ê', 'ê'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê '), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê '), ('ê ', 'ê
'), ('ê ', 'ê '), ('ê ', 'ê ą'), @@ -786,7 +806,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), ('êŹ°', 'ê'), - ('ê', '\u{ab69}'), + ('ê', 'ê©'), ('ê°', 'êŻą'), ('ê°', 'íŁ'), ('í°', 'í'), @@ -837,9 +857,20 @@ pub const LETTER: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -870,19 +901,22 @@ pub const LETTER: &'static [(char, char)] = &[ ('đČ', 'đČČ'), ('đł', 'đłČ'), ('đŽ', 'đŽŁ'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș', 'đș©'), + ('đș°', 'đș±'), ('đŒ', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', 'đœ
'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', 'đŸ'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', 'đ·'), + ('đ±', 'đČ'), + ('đ”', 'đ”'), ('đ', 'đŻ'), ('đ', 'đš'), ('đ', 'đŠ'), ('đ
', 'đ
'), - ('\u{11147}', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', 'đ
Č'), ('đ
¶', 'đ
¶'), ('đ', 'đČ'), @@ -891,6 +925,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ«'), + ('đż', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -908,7 +943,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('đ', 'đĄ'), ('đ', 'đŽ'), ('đ', 'đ'), - ('đ', '\u{11461}'), + ('đ', 'đĄ'), ('đ', 'đŻ'), ('đ', 'đ
'), ('đ', 'đ'), @@ -919,15 +954,16 @@ pub const LETTER: &'static [(char, char)] = &[ ('đ', 'đȘ'), ('đž', 'đž'), ('đ', 'đ'), + ('đ', 'đ'), ('đ ', 'đ «'), ('đą ', 'đŁ'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€Ż'), + ('đ€ż', 'đ€ż'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', 'đ§'), ('đ§Ą', 'đ§Ą'), @@ -938,7 +974,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('đ©', 'đ©'), ('đ©', 'đȘ'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', 'đ°ź'), ('đ±', 'đ±'), @@ -952,13 +988,19 @@ pub const LETTER: &'static [(char, char)] = &[ ('đ”Ș', 'đ¶'), ('đ¶', 'đ¶'), ('đ» ', 'đ»Č'), - ('\u{11fb0}', '\u{11fb0}'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒł'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), + ('đ©°', 'đȘŸ'), ('đ«', 'đ«'), ('đŹ', 'đŹŻ'), ('đ', 'đ'), @@ -971,10 +1013,15 @@ pub const LETTER: &'static [(char, char)] = &[ ('đż ', 'đżĄ'), ('đżŁ', 'đżŁ'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), @@ -1011,10 +1058,19 @@ pub const LETTER: &'static [(char, char)] = &[ ('đ', 'đš'), ('đȘ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), ('đ', 'đŹ'), ('đ·', 'đœ'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ', 'đ«'), + ('đ', 'đ«'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('đ€', 'đ„'), ('đ„', 'đ„'), @@ -1051,13 +1107,14 @@ pub const LETTER: &'static [(char, char)] = &[ ('đșĄ', 'đșŁ'), ('đș„', 'đș©'), ('đș«', 'đș»'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const LETTER_NUMBER: &'static [(char, char)] = &[ @@ -1510,7 +1567,7 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â
'), ('â', 'â'), - ('â°°', 'â±'), + ('â°°', 'â±'), ('ⱥ', 'ⱥ'), ('ⱄ', 'ⱊ'), ('ⱚ', 'ⱚ'), @@ -1679,19 +1736,29 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('ê»', 'ê»'), ('êœ', 'êœ'), ('êż', 'êż'), + ('ê', 'ê'), ('ê', 'ê'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê¶', 'ê¶'), ('êș', 'êș'), ('êŹ°', 'ê'), - ('ê ', '\u{ab68}'), + ('ê ', 'êš'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), ('ïŹ', 'ïŹ'), ('ïœ', 'ïœ'), ('đš', 'đ'), ('đ', 'đ»'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đł', 'đłČ'), ('đŁ', 'đŁ'), ('đč ', 'đčż'), @@ -1723,6 +1790,9 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('đȘ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), ('đ€ą', 'đ„'), ]; @@ -1751,7 +1821,8 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'à€'), ('\u{93a}', '\u{93c}'), ('à€Ÿ', 'à„'), @@ -1793,6 +1864,7 @@ pub const MARK: &'static [(char, char)] = &[ ('àŻ', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -1805,6 +1877,7 @@ pub const MARK: &'static [(char, char)] = &[ ('àł', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), + ('àłł', 'àłł'), ('\u{d00}', 'àŽ'), ('\u{d3b}', '\u{d3c}'), ('\u{d3e}', '\u{d44}'), @@ -1823,7 +1896,7 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -1844,13 +1917,14 @@ pub const MARK: &'static [(char, char)] = &[ ('á', 'á'), ('á', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1712}', 'á'), + ('\u{1732}', 'áŽ'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', 'ါ'), @@ -1859,7 +1933,7 @@ pub const MARK: &'static [(char, char)] = &[ ('á©', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'áŹ'), ('\u{1b34}', 'á'), ('\u{1b6b}', '\u{1b73}'), @@ -1872,8 +1946,7 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('áł·', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), ('\u{2d7f}', '\u{2d7f}'), @@ -1925,11 +1998,16 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('đ', 'đ'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', 'đ'), ('đ°', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11134}'), ('đ
', 'đ
'), @@ -1937,9 +2015,10 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{11180}', 'đ'), ('đł', 'đ'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '\u{111cf}'), + ('đ', '\u{111cf}'), ('đŹ', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112ea}'), ('\u{11300}', 'đ'), ('\u{1133b}', '\u{1133c}'), @@ -1960,11 +2039,11 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'), ('đ Ź', '\u{1183a}'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('\u{11930}', 'đ€”'), + ('đ€·', 'đ€ž'), ('\u{1193b}', '\u{1193e}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11943}'), + ('đ„', 'đ„'), + ('đ„', '\u{11943}'), ('đ§', '\u{119d7}'), ('\u{119da}', '\u{119e0}'), ('đ§€', 'đ§€'), @@ -1987,14 +2066,22 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{11d90}', '\u{11d91}'), ('đ¶', '\u{11d97}'), ('\u{11ef3}', 'đ»¶'), + ('\u{11f00}', '\u{11f01}'), + ('đŒ', 'đŒ'), + ('đŒŽ', '\u{11f3a}'), + ('đŒŸ', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('đœ', 'đŸ'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('đ
', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -2012,8 +2099,11 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}'), @@ -2102,6 +2192,7 @@ pub const MODIFIER_LETTER: &'static [(char, char)] = &[ ('à ', 'à '), ('à €', 'à €'), ('à š', 'à š'), + ('àŁ', 'àŁ'), ('à„±', 'à„±'), ('àč', 'àč'), ('à»', 'à»'), @@ -2132,6 +2223,7 @@ pub const MODIFIER_LETTER: &'static [(char, char)] = &[ ('ê', 'ê'), ('ê°', 'ê°'), ('ê', 'ê'), + ('êČ', 'êŽ'), ('êž', 'êč'), ('ê§', 'ê§'), ('꧊', '꧊'), @@ -2139,14 +2231,22 @@ pub const MODIFIER_LETTER: &'static [(char, char)] = &[ ('ê«', 'ê«'), ('ê«ł', 'ê«Ž'), ('ê', 'ê'), - ('\u{ab69}', '\u{ab69}'), + ('ê©', 'ê©'), ('', ''), ('\u{ff9e}', '\u{ff9f}'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ', 'đ'), ('đŸ', 'đŸ'), ('đż ', 'đżĄ'), ('đżŁ', 'đżŁ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ°', 'đ'), ('đ·', 'đœ'), + ('đ«', 'đ«'), ('đ„', 'đ„'), ]; @@ -2164,6 +2264,7 @@ pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[ ('ËŻ', 'Ëż'), ('Í”', 'Í”'), ('Î', 'Î
'), + ('àą', 'àą'), ('ៜ', 'ៜ'), ('áŸż', 'áż'), ('áż', 'áż'), @@ -2175,8 +2276,8 @@ pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[ ('ê ', 'êĄ'), ('ê', 'ê'), ('ê', 'ê'), - ('\u{ab6a}', '\u{ab6b}'), - ('ïźČ', 'ïŻ'), + ('êȘ', 'ê«'), + ('ïźČ', 'ïŻ'), ('', ''), ('ïœ', 'ïœ'), ('ïżŁ', 'ïżŁ'), @@ -2208,7 +2309,8 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), @@ -2249,6 +2351,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{bcd}', '\u{bcd}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -2274,7 +2377,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -2298,7 +2401,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1732}', '\u{1733}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -2307,6 +2410,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{1922}'), @@ -2323,7 +2427,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ac0}'), + ('\u{1abf}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b34}'), ('\u{1b36}', '\u{1b3a}'), @@ -2346,8 +2450,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{20d0}', '\u{20dc}'), ('\u{20e1}', '\u{20e1}'), ('\u{20e5}', '\u{20f0}'), @@ -2405,12 +2508,17 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'), @@ -2423,6 +2531,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), @@ -2484,12 +2593,20 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{11d95}', '\u{11d95}'), ('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f40}'), + ('\u{11f42}', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), @@ -2506,8 +2623,11 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}'), @@ -2610,7 +2730,7 @@ pub const NUMBER: &'static [(char, char)] = &[ ('đč ', 'đčŸ'), ('đŒ', 'đŒŠ'), ('đœ', 'đœ'), - ('\u{10fc5}', '\u{10fcb}'), + ('đż
', 'đż'), ('đ', 'đŻ'), ('đ°', 'đč'), ('đ¶', 'đż'), @@ -2623,21 +2743,25 @@ pub const NUMBER: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ°', 'đ»'), ('đŁ ', 'đŁČ'), - ('\u{11950}', '\u{11959}'), + ('đ„', 'đ„'), ('đ±', 'đ±Ź'), ('đ”', 'đ”'), ('đ¶ ', 'đ¶©'), + ('đœ', 'đœ'), ('đż', 'đż'), ('đ', 'đź'), ('đ© ', 'đ©©'), + ('đ«', 'đ«'), ('đ', 'đ'), ('đ', 'đĄ'), ('đș', 'đș'), + ('đ', 'đ'), ('đ ', 'đł'), ('đ ', 'đž'), ('đ', 'đż'), ('đ
', 'đ
'), ('đ°', 'đč'), + ('đ°', 'đč'), ('đŁ', 'đŁ'), ('đ„', 'đ„'), ('đ±±', 'đČ«'), @@ -2646,7 +2770,7 @@ pub const NUMBER: &'static [(char, char)] = &[ ('đŽ', 'đŽ'), ('đŽŻ', 'đŽœ'), ('đ', 'đ'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('đŻ°', 'đŻč'), ]; pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[ @@ -2696,6 +2820,10 @@ pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[ ('➊', '➊'), ('âžš', 'âžš'), ('âč', 'âč'), + ('âč', 'âč'), + ('âč', 'âč'), + ('âč', 'âč'), + ('âč', 'âč'), ('ă', 'ă'), ('ă', 'ă'), ('ă', 'ă'), @@ -2728,7 +2856,7 @@ pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[ ]; pub const OTHER: &'static [(char, char)] = &[ - ('\u{0}', '\u{1f}'), + ('\0', '\u{1f}'), ('\u{7f}', '\u{9f}'), ('\u{ad}', '\u{ad}'), ('\u{378}', '\u{379}'), @@ -2743,7 +2871,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{5c8}', '\u{5cf}'), ('\u{5eb}', '\u{5ee}'), ('\u{5f5}', '\u{605}'), - ('\u{61c}', '\u{61d}'), + ('\u{61c}', '\u{61c}'), ('\u{6dd}', '\u{6dd}'), ('\u{70e}', '\u{70f}'), ('\u{74b}', '\u{74c}'), @@ -2753,9 +2881,8 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{83f}', '\u{83f}'), ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), - ('\u{86b}', '\u{89f}'), - ('\u{8b5}', '\u{8b5}'), - ('\u{8c8}', '\u{8d2}'), + ('\u{86b}', '\u{86f}'), + ('\u{88f}', '\u{897}'), ('\u{8e2}', '\u{8e2}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), @@ -2834,12 +2961,13 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'), ('\u{c29}', '\u{c29}'), - ('\u{c3a}', '\u{c3c}'), + ('\u{c3a}', '\u{c3b}'), ('\u{c45}', '\u{c45}'), ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'), - ('\u{c5b}', '\u{c5f}'), + ('\u{c5b}', '\u{c5c}'), + ('\u{c5e}', '\u{c5f}'), ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c76}'), ('\u{c8d}', '\u{c8d}'), @@ -2850,11 +2978,11 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'), ('\u{cce}', '\u{cd4}'), - ('\u{cd7}', '\u{cdd}'), + ('\u{cd7}', '\u{cdc}'), ('\u{cdf}', '\u{cdf}'), ('\u{ce4}', '\u{ce5}'), ('\u{cf0}', '\u{cf0}'), - ('\u{cf3}', '\u{cff}'), + ('\u{cf4}', '\u{cff}'), ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'), ('\u{d45}', '\u{d45}'), @@ -2884,7 +3012,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'), ('\u{ec7}', '\u{ec7}'), - ('\u{ece}', '\u{ecf}'), + ('\u{ecf}', '\u{ecf}'), ('\u{eda}', '\u{edb}'), ('\u{ee0}', '\u{eff}'), ('\u{f48}', '\u{f48}'), @@ -2918,8 +3046,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'), ('\u{16f9}', '\u{16ff}'), - ('\u{170d}', '\u{170d}'), - ('\u{1715}', '\u{171f}'), + ('\u{1716}', '\u{171e}'), ('\u{1737}', '\u{173f}'), ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'), @@ -2928,7 +3055,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'), ('\u{17fa}', '\u{17ff}'), - ('\u{180e}', '\u{180f}'), + ('\u{180e}', '\u{180e}'), ('\u{181a}', '\u{181f}'), ('\u{1879}', '\u{187f}'), ('\u{18ab}', '\u{18af}'), @@ -2948,9 +3075,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'), - ('\u{1ac1}', '\u{1aff}'), - ('\u{1b4c}', '\u{1b4f}'), - ('\u{1b7d}', '\u{1b7f}'), + ('\u{1acf}', '\u{1aff}'), + ('\u{1b4d}', '\u{1b4f}'), + ('\u{1b7f}', '\u{1b7f}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), @@ -2958,7 +3085,6 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1cbb}', '\u{1cbc}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfb}', '\u{1cff}'), - ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'), ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'), @@ -2981,15 +3107,13 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'), ('\u{209d}', '\u{209f}'), - ('\u{20c0}', '\u{20cf}'), + ('\u{20c1}', '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b96}'), - ('\u{2c2f}', '\u{2c2f}'), - ('\u{2c5f}', '\u{2c5f}'), ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'), ('\u{2d28}', '\u{2d2c}'), @@ -3005,7 +3129,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'), ('\u{2ddf}', '\u{2ddf}'), - ('\u{2e53}', '\u{2e7f}'), + ('\u{2e5e}', '\u{2e7f}'), ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), @@ -3017,13 +3141,14 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{318f}', '\u{318f}'), ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'), - ('\u{9ffd}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7c0}', '\u{a7c1}'), - ('\u{a7cb}', '\u{a7f4}'), + ('\u{a7cb}', '\u{a7cf}'), + ('\u{a7d2}', '\u{a7d2}'), + ('\u{a7d4}', '\u{a7d4}'), + ('\u{a7da}', '\u{a7f1}'), ('\u{a82d}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'), @@ -3059,11 +3184,10 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'), ('\u{fb45}', '\u{fb45}'), - ('\u{fbc2}', '\u{fbd2}'), - ('\u{fd40}', '\u{fd4f}'), + ('\u{fbc3}', '\u{fbd2}'), ('\u{fd90}', '\u{fd91}'), - ('\u{fdc8}', '\u{fdef}'), - ('\u{fdfe}', '\u{fdff}'), + ('\u{fdc8}', '\u{fdce}'), + ('\u{fdd0}', '\u{fdef}'), ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'), ('\u{fe67}', '\u{fe67}'), @@ -3106,10 +3230,20 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{104fc}', '\u{104ff}'), ('\u{10528}', '\u{1052f}'), ('\u{10564}', '\u{1056e}'), - ('\u{10570}', '\u{105ff}'), + ('\u{1057b}', '\u{1057b}'), + ('\u{1058b}', '\u{1058b}'), + ('\u{10593}', '\u{10593}'), + ('\u{10596}', '\u{10596}'), + ('\u{105a2}', '\u{105a2}'), + ('\u{105b2}', '\u{105b2}'), + ('\u{105ba}', '\u{105ba}'), + ('\u{105bd}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), - ('\u{10768}', '\u{107ff}'), + ('\u{10768}', '\u{1077f}'), + ('\u{10786}', '\u{10786}'), + ('\u{107b1}', '\u{107b1}'), + ('\u{107bb}', '\u{107ff}'), ('\u{10806}', '\u{10807}'), ('\u{10809}', '\u{10809}'), ('\u{10836}', '\u{10836}'), @@ -3150,15 +3284,16 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{10e7f}', '\u{10e7f}'), ('\u{10eaa}', '\u{10eaa}'), ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10eff}'), + ('\u{10eb2}', '\u{10efc}'), ('\u{10f28}', '\u{10f2f}'), - ('\u{10f5a}', '\u{10faf}'), + ('\u{10f5a}', '\u{10f6f}'), + ('\u{10f8a}', '\u{10faf}'), ('\u{10fcc}', '\u{10fdf}'), ('\u{10ff7}', '\u{10fff}'), ('\u{1104e}', '\u{11051}'), - ('\u{11070}', '\u{1107e}'), + ('\u{11076}', '\u{1107e}'), ('\u{110bd}', '\u{110bd}'), - ('\u{110c2}', '\u{110cf}'), + ('\u{110c3}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'), ('\u{110fa}', '\u{110ff}'), ('\u{11135}', '\u{11135}'), @@ -3167,7 +3302,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{111e0}', '\u{111e0}'), ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'), - ('\u{1123f}', '\u{1127f}'), + ('\u{11242}', '\u{1127f}'), ('\u{11287}', '\u{11287}'), ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'), @@ -3199,11 +3334,11 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'), ('\u{1166d}', '\u{1167f}'), - ('\u{116b9}', '\u{116bf}'), + ('\u{116ba}', '\u{116bf}'), ('\u{116ca}', '\u{116ff}'), ('\u{1171b}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), - ('\u{11740}', '\u{117ff}'), + ('\u{11747}', '\u{117ff}'), ('\u{1183c}', '\u{1189f}'), ('\u{118f3}', '\u{118fe}'), ('\u{11907}', '\u{11908}'), @@ -3218,8 +3353,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{119d8}', '\u{119d9}'), ('\u{119e5}', '\u{119ff}'), ('\u{11a48}', '\u{11a4f}'), - ('\u{11aa3}', '\u{11abf}'), - ('\u{11af9}', '\u{11bff}'), + ('\u{11aa3}', '\u{11aaf}'), + ('\u{11af9}', '\u{11aff}'), + ('\u{11b0a}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'), @@ -3240,19 +3376,25 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11d92}', '\u{11d92}'), ('\u{11d99}', '\u{11d9f}'), ('\u{11daa}', '\u{11edf}'), - ('\u{11ef9}', '\u{11faf}'), + ('\u{11ef9}', '\u{11eff}'), + ('\u{11f11}', '\u{11f11}'), + ('\u{11f3b}', '\u{11f3d}'), + ('\u{11f5a}', '\u{11faf}'), ('\u{11fb1}', '\u{11fbf}'), ('\u{11ff2}', '\u{11ffe}'), ('\u{1239a}', '\u{123ff}'), ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'), - ('\u{12544}', '\u{12fff}'), - ('\u{1342f}', '\u{143ff}'), + ('\u{12544}', '\u{12f8f}'), + ('\u{12ff3}', '\u{12fff}'), + ('\u{13430}', '\u{1343f}'), + ('\u{13456}', '\u{143ff}'), ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), - ('\u{16a70}', '\u{16acf}'), + ('\u{16abf}', '\u{16abf}'), + ('\u{16aca}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'), ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'), @@ -3268,21 +3410,30 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{16ff2}', '\u{16fff}'), ('\u{187f8}', '\u{187ff}'), ('\u{18cd6}', '\u{18cff}'), - ('\u{18d09}', '\u{1afff}'), - ('\u{1b11f}', '\u{1b14f}'), - ('\u{1b153}', '\u{1b163}'), + ('\u{18d09}', '\u{1afef}'), + ('\u{1aff4}', '\u{1aff4}'), + ('\u{1affc}', '\u{1affc}'), + ('\u{1afff}', '\u{1afff}'), + ('\u{1b123}', '\u{1b131}'), + ('\u{1b133}', '\u{1b14f}'), + ('\u{1b153}', '\u{1b154}'), + ('\u{1b156}', '\u{1b163}'), ('\u{1b168}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'), ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca0}', '\u{1cfff}'), + ('\u{1bca0}', '\u{1ceff}'), + ('\u{1cf2e}', '\u{1cf2f}'), + ('\u{1cf47}', '\u{1cf4f}'), + ('\u{1cfc4}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'), ('\u{1d127}', '\u{1d128}'), ('\u{1d173}', '\u{1d17a}'), - ('\u{1d1e9}', '\u{1d1ff}'), - ('\u{1d246}', '\u{1d2df}'), + ('\u{1d1eb}', '\u{1d1ff}'), + ('\u{1d246}', '\u{1d2bf}'), + ('\u{1d2d4}', '\u{1d2df}'), ('\u{1d2f4}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'), ('\u{1d379}', '\u{1d3ff}'), @@ -3308,18 +3459,28 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}', '\u{1daa0}'), - ('\u{1dab0}', '\u{1dfff}'), + ('\u{1dab0}', '\u{1deff}'), + ('\u{1df1f}', '\u{1df24}'), + ('\u{1df2b}', '\u{1dfff}'), ('\u{1e007}', '\u{1e007}'), ('\u{1e019}', '\u{1e01a}'), ('\u{1e022}', '\u{1e022}'), ('\u{1e025}', '\u{1e025}'), - ('\u{1e02b}', '\u{1e0ff}'), + ('\u{1e02b}', '\u{1e02f}'), + ('\u{1e06e}', '\u{1e08e}'), + ('\u{1e090}', '\u{1e0ff}'), ('\u{1e12d}', '\u{1e12f}'), ('\u{1e13e}', '\u{1e13f}'), ('\u{1e14a}', '\u{1e14d}'), - ('\u{1e150}', '\u{1e2bf}'), + ('\u{1e150}', '\u{1e28f}'), + ('\u{1e2af}', '\u{1e2bf}'), ('\u{1e2fa}', '\u{1e2fe}'), - ('\u{1e300}', '\u{1e7ff}'), + ('\u{1e300}', '\u{1e4cf}'), + ('\u{1e4fa}', '\u{1e7df}'), + ('\u{1e7e7}', '\u{1e7e7}'), + ('\u{1e7ec}', '\u{1e7ec}'), + ('\u{1e7ef}', '\u{1e7ef}'), + ('\u{1e7ff}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'), ('\u{1e8d7}', '\u{1e8ff}'), ('\u{1e94c}', '\u{1e94f}'), @@ -3373,39 +3534,39 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1f249}', '\u{1f24f}'), ('\u{1f252}', '\u{1f25f}'), ('\u{1f266}', '\u{1f2ff}'), - ('\u{1f6d8}', '\u{1f6df}'), + ('\u{1f6d8}', '\u{1f6db}'), ('\u{1f6ed}', '\u{1f6ef}'), ('\u{1f6fd}', '\u{1f6ff}'), - ('\u{1f774}', '\u{1f77f}'), - ('\u{1f7d9}', '\u{1f7df}'), - ('\u{1f7ec}', '\u{1f7ff}'), + ('\u{1f777}', '\u{1f77a}'), + ('\u{1f7da}', '\u{1f7df}'), + ('\u{1f7ec}', '\u{1f7ef}'), + ('\u{1f7f1}', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8af}'), ('\u{1f8b2}', '\u{1f8ff}'), - ('\u{1f979}', '\u{1f979}'), - ('\u{1f9cc}', '\u{1f9cc}'), ('\u{1fa54}', '\u{1fa5f}'), ('\u{1fa6e}', '\u{1fa6f}'), - ('\u{1fa75}', '\u{1fa77}'), - ('\u{1fa7b}', '\u{1fa7f}'), - ('\u{1fa87}', '\u{1fa8f}'), - ('\u{1faa9}', '\u{1faaf}'), - ('\u{1fab7}', '\u{1fabf}'), - ('\u{1fac3}', '\u{1facf}'), - ('\u{1fad7}', '\u{1faff}'), + ('\u{1fa7d}', '\u{1fa7f}'), + ('\u{1fa89}', '\u{1fa8f}'), + ('\u{1fabe}', '\u{1fabe}'), + ('\u{1fac6}', '\u{1facd}'), + ('\u{1fadc}', '\u{1fadf}'), + ('\u{1fae9}', '\u{1faef}'), + ('\u{1faf9}', '\u{1faff}'), ('\u{1fb93}', '\u{1fb93}'), ('\u{1fbcb}', '\u{1fbef}'), ('\u{1fbfa}', '\u{1ffff}'), - ('\u{2a6de}', '\u{2a6ff}'), - ('\u{2b735}', '\u{2b73f}'), + ('\u{2a6e0}', '\u{2a6ff}'), + ('\u{2b73a}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), ('\u{2ebe1}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{2ffff}'), - ('\u{3134b}', '\u{e00ff}'), + ('\u{3134b}', '\u{3134f}'), + ('\u{323b0}', '\u{e00ff}'), ('\u{e01f0}', '\u{10ffff}'), ]; @@ -3433,8 +3594,9 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('à ', 'à '), ('àĄ', 'àĄ'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('àą ', 'àŁ'), ('à€', 'à€č'), ('à€œ', 'à€œ'), ('à„', 'à„'), @@ -3499,6 +3661,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('à°Ș', 'à°č'), ('à°œ', 'à°œ'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', 'à±Ą'), ('àČ', 'àČ'), ('àČ
', 'àČ'), @@ -3507,10 +3670,10 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('àČȘ', 'àČł'), ('àČ”', 'àČč'), ('àČœ', 'àČœ'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', 'àłĄ'), ('àł±', 'àłČ'), - ('\u{d04}', 'àŽ'), + ('àŽ', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', 'àŽș'), ('àŽœ', 'àŽœ'), @@ -3571,9 +3734,8 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('á±', 'áž'), - ('á', 'á'), - ('á', 'á'), - ('á ', 'á±'), + ('á', 'á'), + ('á', 'á±'), ('á', 'á'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -3593,7 +3755,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('áš', 'áš'), ('áš ', 'á©'), ('áŹ
', 'Ᏻ'), - ('á
', 'á'), + ('á
', 'á'), ('áź', 'áź '), ('áźź', '៯'), ('áźș', 'ᯄ'), @@ -3623,11 +3785,10 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('ăż', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'ê'), ('ê', 'ê·'), ('ê', 'ê'), @@ -3750,19 +3911,22 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('đź', 'đź'), ('đ°', 'đ±'), ('đŽ', 'đŽŁ'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș', 'đș©'), + ('đș°', 'đș±'), ('đŒ', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', 'đœ
'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', 'đŸ'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', 'đ·'), + ('đ±', 'đČ'), + ('đ”', 'đ”'), ('đ', 'đŻ'), ('đ', 'đš'), ('đ', 'đŠ'), ('đ
', 'đ
'), - ('\u{11147}', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', 'đ
Č'), ('đ
¶', 'đ
¶'), ('đ', 'đČ'), @@ -3771,6 +3935,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ«'), + ('đż', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -3788,7 +3953,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('đ', 'đĄ'), ('đ', 'đŽ'), ('đ', 'đ'), - ('đ', '\u{11461}'), + ('đ', 'đĄ'), ('đ', 'đŻ'), ('đ', 'đ
'), ('đ', 'đ'), @@ -3799,14 +3964,15 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('đ', 'đȘ'), ('đž', 'đž'), ('đ', 'đ'), + ('đ', 'đ'), ('đ ', 'đ «'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€Ż'), + ('đ€ż', 'đ€ż'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', 'đ§'), ('đ§Ą', 'đ§Ą'), @@ -3817,7 +3983,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('đ©', 'đ©'), ('đ©', 'đȘ'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', 'đ°ź'), ('đ±', 'đ±'), @@ -3831,13 +3997,19 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('đ”Ș', 'đ¶'), ('đ¶', 'đ¶'), ('đ» ', 'đ»Č'), - ('\u{11fb0}', '\u{11fb0}'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒł'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), + ('đ©°', 'đȘŸ'), ('đ«', 'đ«'), ('đŹ', 'đŹŻ'), ('đŁ', 'đ·'), @@ -3845,19 +4017,28 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('đŒ', 'đœ'), ('đœ', 'đœ'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), ('đ±°', 'đ±Œ'), ('đČ', 'đČ'), ('đČ', 'đČ'), + ('đŒ', 'đŒ'), ('đ', 'đŹ'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ', 'đ«'), + ('đ', 'đȘ'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('đž', 'đž'), ('đž
', 'đž'), @@ -3892,13 +4073,14 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('đșĄ', 'đșŁ'), ('đș„', 'đș©'), ('đș«', 'đș»'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const OTHER_NUMBER: &'static [(char, char)] = &[ @@ -3955,7 +4137,7 @@ pub const OTHER_NUMBER: &'static [(char, char)] = &[ ('đč ', 'đčŸ'), ('đŒ', 'đŒŠ'), ('đœ', 'đœ'), - ('\u{10fc5}', '\u{10fcb}'), + ('đż
', 'đż'), ('đ', 'đ„'), ('đĄ', 'đŽ'), ('đș', 'đ»'), @@ -3964,6 +4146,7 @@ pub const OTHER_NUMBER: &'static [(char, char)] = &[ ('đż', 'đż'), ('đ', 'đĄ'), ('đș', 'đș'), + ('đ', 'đ'), ('đ ', 'đł'), ('đ ', 'đž'), ('đŁ', 'đŁ'), @@ -3999,7 +4182,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('Ű', 'Ű'), ('Ű', 'Ű'), ('Ű', 'Ű'), - ('Ű', 'Ű'), + ('Ű', 'Ű'), ('ÙȘ', 'Ù'), ('Û', 'Û'), ('Ü', 'Ü'), @@ -4036,6 +4219,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('áȘ ', 'áȘŠ'), ('áȘš', 'áȘ'), ('á', 'á '), + ('áœ', 'áŸ'), ('áŻŒ', '᯿'), ('á°»', 'á°ż'), ('ᱟ', '᱿'), @@ -4064,7 +4248,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('➌', 'âžż'), ('âč', 'âč'), ('âč', 'âč'), - ('\u{2e52}', '\u{2e52}'), + ('âč', 'âč'), ('ă', 'ă'), ('ăœ', 'ăœ'), ('ă»', 'ă»'), @@ -4118,6 +4302,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('đŹč', 'đŹż'), ('đź', 'đź'), ('đœ', 'đœ'), + ('đŸ', 'đŸ'), ('đ', 'đ'), ('đ»', 'đŒ'), ('đŸ', 'đ'), @@ -4130,24 +4315,28 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('đž', 'đœ'), ('đ©', 'đ©'), ('đ', 'đ'), - ('\u{1145a}', 'đ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đŹ'), + ('đč', 'đč'), ('đŒ', 'đŸ'), ('đ »', 'đ »'), - ('\u{11944}', '\u{11946}'), + ('đ„', 'đ„'), ('đ§ą', 'đ§ą'), ('đšż', 'đ©'), ('đȘ', 'đȘ'), ('đȘ', 'đȘą'), + ('đŹ', 'đŹ'), ('đ±', 'đ±
'), ('đ±°', 'đ±±'), ('đ»·', 'đ»ž'), + ('đœ', 'đœ'), ('đżż', 'đżż'), ('đ°', 'đŽ'), + ('đż±', 'đżČ'), ('đ©ź', 'đ©Ż'), ('đ«”', 'đ«”'), ('đŹ·', 'đŹ»'), @@ -4240,9 +4429,9 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('â
', 'â'), ('â', 'âł'), ('â¶', 'âź'), - ('\u{2b97}', '⯿'), + ('âź', '⯿'), ('âł„', 'âłȘ'), - ('\u{2e50}', '\u{2e51}'), + ('âč', 'âč'), ('âș', 'âș'), ('âș', '⻳'), ('âŒ', 'âż'), @@ -4267,7 +4456,9 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('ê ¶', 'ê ·'), ('ê č', 'ê č'), ('ê©·', 'ê©č'), - ('ï·œ', 'ï·œ'), + ('ï”', 'ï”'), + ('ï·', 'ï·'), + ('ï·œ', 'ï·ż'), ('ïż€', 'ïż€'), ('ïżš', 'ïżš'), ('ïż', 'ïżź'), @@ -4275,7 +4466,7 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('đ·', 'đż'), ('đ
č', 'đ'), ('đ', 'đ'), - ('đ', '\u{1019c}'), + ('đ', 'đ'), ('đ ', 'đ '), ('đ', 'đŒ'), ('đĄ·', 'đĄž'), @@ -4286,13 +4477,14 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('đŹŒ', 'đŹż'), ('đ
', 'đ
'), ('đČ', 'đČ'), + ('đœ', 'đż'), ('đ', 'đ”'), ('đ', 'đŠ'), ('đ©', 'đ
€'), ('đ
Ș', 'đ
Ź'), ('đ', 'đ'), ('đ', 'đ©'), - ('đź', 'đš'), + ('đź', 'đȘ'), ('đ', 'đ'), ('đ
', 'đ
'), ('đ', 'đ'), @@ -4310,38 +4502,37 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('đ±', 'đż'), ('đ', 'đ'), ('đ', 'đ”'), - ('\u{1f10d}', '\u{1f1ad}'), + ('đ', 'đ'), ('đŠ', 'đ'), ('đ', 'đ»'), ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đ„'), ('đ', 'đș'), - ('đ', '\u{1f6d7}'), - ('đ ', 'đŹ'), - ('đ°', '\u{1f6fc}'), - ('đ', 'đł'), - ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đŹ'), + ('đ°', 'đŒ'), + ('đ', 'đ¶'), + ('đ»', 'đ'), ('đ ', 'đ«'), + ('đ°', 'đ°'), ('đ ', 'đ '), ('đ ', 'đĄ'), ('đĄ', 'đĄ'), ('đĄ ', 'đą'), ('đą', 'đą'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('đ€', '\u{1f978}'), - ('đ„ș', '\u{1f9cb}'), - ('đ§', 'đ©'), + ('đą°', 'đą±'), + ('đ€', 'đ©'), ('đ© ', 'đ©'), - ('đ©°', '\u{1fa74}'), - ('đ©ž', 'đ©ș'), - ('đȘ', '\u{1fa86}'), - ('đȘ', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), + ('đ©°', 'đ©Œ'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘœ'), + ('đȘż', 'đ«
'), + ('đ«', 'đ«'), + ('đ« ', 'đ«š'), + ('đ«°', 'đ«ž'), + ('đŹ', 'đź'), + ('đź', 'đŻ'), ]; pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] = @@ -4381,7 +4572,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('Ű', 'Ű'), ('Ű', 'Ű'), ('Ű', 'Ű'), - ('Ű', 'Ű'), + ('Ű', 'Ű'), ('ÙȘ', 'Ù'), ('Û', 'Û'), ('Ü', 'Ü'), @@ -4420,6 +4611,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('áȘ ', 'áȘŠ'), ('áȘš', 'áȘ'), ('á', 'á '), + ('áœ', 'áŸ'), ('áŻŒ', '᯿'), ('á°»', 'á°ż'), ('ᱟ', '᱿'), @@ -4444,7 +4636,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('â”°', 'â”°'), ('âž', 'âžź'), ('âž°', 'âč'), - ('\u{2e52}', '\u{2e52}'), + ('âč', 'âč'), ('ă', 'ă'), ('ă', 'ă'), ('ă', 'ă'), @@ -4498,8 +4690,9 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('đ«°', 'đ«¶'), ('đŹč', 'đŹż'), ('đź', 'đź'), - ('\u{10ead}', '\u{10ead}'), + ('đș', 'đș'), ('đœ', 'đœ'), + ('đŸ', 'đŸ'), ('đ', 'đ'), ('đ»', 'đŒ'), ('đŸ', 'đ'), @@ -4512,24 +4705,28 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('đž', 'đœ'), ('đ©', 'đ©'), ('đ', 'đ'), - ('\u{1145a}', 'đ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đŹ'), + ('đč', 'đč'), ('đŒ', 'đŸ'), ('đ »', 'đ »'), - ('\u{11944}', '\u{11946}'), + ('đ„', 'đ„'), ('đ§ą', 'đ§ą'), ('đšż', 'đ©'), ('đȘ', 'đȘ'), ('đȘ', 'đȘą'), + ('đŹ', 'đŹ'), ('đ±', 'đ±
'), ('đ±°', 'đ±±'), ('đ»·', 'đ»ž'), + ('đœ', 'đœ'), ('đżż', 'đżż'), ('đ°', 'đŽ'), + ('đż±', 'đżČ'), ('đ©ź', 'đ©Ż'), ('đ«”', 'đ«”'), ('đŹ·', 'đŹ»'), @@ -4598,6 +4795,7 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('àł', 'àł'), ('àł', 'àł'), ('\u{cd5}', '\u{cd6}'), + ('àłł', 'àłł'), ('àŽ', 'àŽ'), ('\u{d3e}', 'à”'), ('à”', 'à”'), @@ -4620,6 +4818,8 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('á', 'á'), ('á', 'á'), ('á', 'á'), + ('á', 'á'), + ('áŽ', 'áŽ'), ('á¶', 'á¶'), ('áŸ', 'á
'), ('á', 'á'), @@ -4682,7 +4882,7 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('đ', 'đ'), ('đł', 'đ”'), ('đż', 'đ'), - ('\u{111ce}', '\u{111ce}'), + ('đ', 'đ'), ('đŹ', 'đź'), ('đČ', 'đł'), ('đ”', 'đ”'), @@ -4714,11 +4914,11 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('đŠ', 'đŠ'), ('đ Ź', 'đ ź'), ('đ ž', 'đ ž'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193d}', '\u{1193d}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11942}'), + ('\u{11930}', 'đ€”'), + ('đ€·', 'đ€ž'), + ('đ€œ', 'đ€œ'), + ('đ„', 'đ„'), + ('đ„', 'đ„'), ('đ§', 'đ§'), ('đ§', 'đ§'), ('đ§€', 'đ§€'), @@ -4734,8 +4934,12 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('đ¶', 'đ¶'), ('đ¶', 'đ¶'), ('đ»”', 'đ»¶'), + ('đŒ', 'đŒ'), + ('đŒŽ', 'đŒ”'), + ('đŒŸ', 'đŒż'), + ('đœ', 'đœ'), ('đœ', 'đŸ'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('\u{1d165}', 'đ
Š'), ('đ
', '\u{1d172}'), ]; @@ -4774,6 +4978,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('Ûœ', 'ÛŸ'), ('߶', '߶'), ('ߟ', 'ßż'), + ('àą', 'àą'), ('à§Č', 'à§ł'), ('à§ș', '৻'), ('૱', '૱'), @@ -4812,7 +5017,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('â', 'â'), ('âș', 'âŒ'), ('â', 'â'), - ('â ', 'âż'), + ('â ', 'â'), ('â', 'â'), ('â', 'â'), ('â', 'â'), @@ -4841,9 +5046,9 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('â§', '⧻'), ('⧟', 'âł'), ('â¶', 'âź'), - ('\u{2b97}', '⯿'), + ('âź', '⯿'), ('âł„', 'âłȘ'), - ('\u{2e50}', '\u{2e51}'), + ('âč', 'âč'), ('âș', 'âș'), ('âș', '⻳'), ('âŒ', 'âż'), @@ -4872,10 +5077,12 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('ê ¶', 'ê č'), ('ê©·', 'ê©č'), ('ê', 'ê'), - ('\u{ab6a}', '\u{ab6b}'), + ('êȘ', 'ê«'), ('ïŹ©', 'ïŹ©'), - ('ïźČ', 'ïŻ'), - ('ï·Œ', 'ï·œ'), + ('ïźČ', 'ïŻ'), + ('ï”', 'ï”'), + ('ï·', 'ï·'), + ('ï·Œ', 'ï·ż'), ('ïčą', 'ïčą'), ('ïč€', 'ïčŠ'), ('ïč©', 'ïč©'), @@ -4892,7 +5099,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('đ·', 'đż'), ('đ
č', 'đ'), ('đ', 'đ'), - ('đ', '\u{1019c}'), + ('đ', 'đ'), ('đ ', 'đ '), ('đ', 'đŒ'), ('đĄ·', 'đĄž'), @@ -4902,13 +5109,14 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('đŹŒ', 'đŹż'), ('đ
', 'đ
'), ('đČ', 'đČ'), + ('đœ', 'đż'), ('đ', 'đ”'), ('đ', 'đŠ'), ('đ©', 'đ
€'), ('đ
Ș', 'đ
Ź'), ('đ', 'đ'), ('đ', 'đ©'), - ('đź', 'đš'), + ('đź', 'đȘ'), ('đ', 'đ'), ('đ
', 'đ
'), ('đ', 'đ'), @@ -4939,37 +5147,36 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('đ±', 'đż'), ('đ', 'đ'), ('đ', 'đ”'), - ('\u{1f10d}', '\u{1f1ad}'), + ('đ', 'đ'), ('đŠ', 'đ'), ('đ', 'đ»'), ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đ„'), - ('đ', '\u{1f6d7}'), - ('đ ', 'đŹ'), - ('đ°', '\u{1f6fc}'), - ('đ', 'đł'), - ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đŹ'), + ('đ°', 'đŒ'), + ('đ', 'đ¶'), + ('đ»', 'đ'), ('đ ', 'đ«'), + ('đ°', 'đ°'), ('đ ', 'đ '), ('đ ', 'đĄ'), ('đĄ', 'đĄ'), ('đĄ ', 'đą'), ('đą', 'đą'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('đ€', '\u{1f978}'), - ('đ„ș', '\u{1f9cb}'), - ('đ§', 'đ©'), + ('đą°', 'đą±'), + ('đ€', 'đ©'), ('đ© ', 'đ©'), - ('đ©°', '\u{1fa74}'), - ('đ©ž', 'đ©ș'), - ('đȘ', '\u{1fa86}'), - ('đȘ', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), + ('đ©°', 'đ©Œ'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘœ'), + ('đȘż', 'đ«
'), + ('đ«', 'đ«'), + ('đ« ', 'đ«š'), + ('đ«°', 'đ«ž'), + ('đŹ', 'đź'), + ('đź', 'đŻ'), ]; pub const TITLECASE_LETTER: &'static [(char, char)] = &[ @@ -4998,7 +5205,6 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{5c8}', '\u{5cf}'), ('\u{5eb}', '\u{5ee}'), ('\u{5f5}', '\u{5ff}'), - ('\u{61d}', '\u{61d}'), ('\u{70e}', '\u{70e}'), ('\u{74b}', '\u{74c}'), ('\u{7b2}', '\u{7bf}'), @@ -5007,9 +5213,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{83f}', '\u{83f}'), ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), - ('\u{86b}', '\u{89f}'), - ('\u{8b5}', '\u{8b5}'), - ('\u{8c8}', '\u{8d2}'), + ('\u{86b}', '\u{86f}'), + ('\u{88f}', '\u{88f}'), + ('\u{892}', '\u{897}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), ('\u{991}', '\u{992}'), @@ -5087,12 +5293,13 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'), ('\u{c29}', '\u{c29}'), - ('\u{c3a}', '\u{c3c}'), + ('\u{c3a}', '\u{c3b}'), ('\u{c45}', '\u{c45}'), ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'), - ('\u{c5b}', '\u{c5f}'), + ('\u{c5b}', '\u{c5c}'), + ('\u{c5e}', '\u{c5f}'), ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c76}'), ('\u{c8d}', '\u{c8d}'), @@ -5103,11 +5310,11 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'), ('\u{cce}', '\u{cd4}'), - ('\u{cd7}', '\u{cdd}'), + ('\u{cd7}', '\u{cdc}'), ('\u{cdf}', '\u{cdf}'), ('\u{ce4}', '\u{ce5}'), ('\u{cf0}', '\u{cf0}'), - ('\u{cf3}', '\u{cff}'), + ('\u{cf4}', '\u{cff}'), ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'), ('\u{d45}', '\u{d45}'), @@ -5137,7 +5344,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'), ('\u{ec7}', '\u{ec7}'), - ('\u{ece}', '\u{ecf}'), + ('\u{ecf}', '\u{ecf}'), ('\u{eda}', '\u{edb}'), ('\u{ee0}', '\u{eff}'), ('\u{f48}', '\u{f48}'), @@ -5171,8 +5378,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'), ('\u{16f9}', '\u{16ff}'), - ('\u{170d}', '\u{170d}'), - ('\u{1715}', '\u{171f}'), + ('\u{1716}', '\u{171e}'), ('\u{1737}', '\u{173f}'), ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'), @@ -5181,7 +5387,6 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'), ('\u{17fa}', '\u{17ff}'), - ('\u{180f}', '\u{180f}'), ('\u{181a}', '\u{181f}'), ('\u{1879}', '\u{187f}'), ('\u{18ab}', '\u{18af}'), @@ -5201,9 +5406,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'), - ('\u{1ac1}', '\u{1aff}'), - ('\u{1b4c}', '\u{1b4f}'), - ('\u{1b7d}', '\u{1b7f}'), + ('\u{1acf}', '\u{1aff}'), + ('\u{1b4d}', '\u{1b4f}'), + ('\u{1b7f}', '\u{1b7f}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), @@ -5211,7 +5416,6 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1cbb}', '\u{1cbc}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfb}', '\u{1cff}'), - ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'), ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'), @@ -5232,15 +5436,13 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'), ('\u{209d}', '\u{209f}'), - ('\u{20c0}', '\u{20cf}'), + ('\u{20c1}', '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b96}'), - ('\u{2c2f}', '\u{2c2f}'), - ('\u{2c5f}', '\u{2c5f}'), ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'), ('\u{2d28}', '\u{2d2c}'), @@ -5256,7 +5458,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'), ('\u{2ddf}', '\u{2ddf}'), - ('\u{2e53}', '\u{2e7f}'), + ('\u{2e5e}', '\u{2e7f}'), ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), @@ -5268,13 +5470,14 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{318f}', '\u{318f}'), ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'), - ('\u{9ffd}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7c0}', '\u{a7c1}'), - ('\u{a7cb}', '\u{a7f4}'), + ('\u{a7cb}', '\u{a7cf}'), + ('\u{a7d2}', '\u{a7d2}'), + ('\u{a7d4}', '\u{a7d4}'), + ('\u{a7da}', '\u{a7f1}'), ('\u{a82d}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'), @@ -5310,11 +5513,10 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'), ('\u{fb45}', '\u{fb45}'), - ('\u{fbc2}', '\u{fbd2}'), - ('\u{fd40}', '\u{fd4f}'), + ('\u{fbc3}', '\u{fbd2}'), ('\u{fd90}', '\u{fd91}'), - ('\u{fdc8}', '\u{fdef}'), - ('\u{fdfe}', '\u{fdff}'), + ('\u{fdc8}', '\u{fdce}'), + ('\u{fdd0}', '\u{fdef}'), ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'), ('\u{fe67}', '\u{fe67}'), @@ -5358,10 +5560,20 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{104fc}', '\u{104ff}'), ('\u{10528}', '\u{1052f}'), ('\u{10564}', '\u{1056e}'), - ('\u{10570}', '\u{105ff}'), + ('\u{1057b}', '\u{1057b}'), + ('\u{1058b}', '\u{1058b}'), + ('\u{10593}', '\u{10593}'), + ('\u{10596}', '\u{10596}'), + ('\u{105a2}', '\u{105a2}'), + ('\u{105b2}', '\u{105b2}'), + ('\u{105ba}', '\u{105ba}'), + ('\u{105bd}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), - ('\u{10768}', '\u{107ff}'), + ('\u{10768}', '\u{1077f}'), + ('\u{10786}', '\u{10786}'), + ('\u{107b1}', '\u{107b1}'), + ('\u{107bb}', '\u{107ff}'), ('\u{10806}', '\u{10807}'), ('\u{10809}', '\u{10809}'), ('\u{10836}', '\u{10836}'), @@ -5402,14 +5614,15 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{10e7f}', '\u{10e7f}'), ('\u{10eaa}', '\u{10eaa}'), ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10eff}'), + ('\u{10eb2}', '\u{10efc}'), ('\u{10f28}', '\u{10f2f}'), - ('\u{10f5a}', '\u{10faf}'), + ('\u{10f5a}', '\u{10f6f}'), + ('\u{10f8a}', '\u{10faf}'), ('\u{10fcc}', '\u{10fdf}'), ('\u{10ff7}', '\u{10fff}'), ('\u{1104e}', '\u{11051}'), - ('\u{11070}', '\u{1107e}'), - ('\u{110c2}', '\u{110cc}'), + ('\u{11076}', '\u{1107e}'), + ('\u{110c3}', '\u{110cc}'), ('\u{110ce}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'), ('\u{110fa}', '\u{110ff}'), @@ -5419,7 +5632,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{111e0}', '\u{111e0}'), ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'), - ('\u{1123f}', '\u{1127f}'), + ('\u{11242}', '\u{1127f}'), ('\u{11287}', '\u{11287}'), ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'), @@ -5451,11 +5664,11 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'), ('\u{1166d}', '\u{1167f}'), - ('\u{116b9}', '\u{116bf}'), + ('\u{116ba}', '\u{116bf}'), ('\u{116ca}', '\u{116ff}'), ('\u{1171b}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), - ('\u{11740}', '\u{117ff}'), + ('\u{11747}', '\u{117ff}'), ('\u{1183c}', '\u{1189f}'), ('\u{118f3}', '\u{118fe}'), ('\u{11907}', '\u{11908}'), @@ -5470,8 +5683,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{119d8}', '\u{119d9}'), ('\u{119e5}', '\u{119ff}'), ('\u{11a48}', '\u{11a4f}'), - ('\u{11aa3}', '\u{11abf}'), - ('\u{11af9}', '\u{11bff}'), + ('\u{11aa3}', '\u{11aaf}'), + ('\u{11af9}', '\u{11aff}'), + ('\u{11b0a}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'), @@ -5492,20 +5706,24 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11d92}', '\u{11d92}'), ('\u{11d99}', '\u{11d9f}'), ('\u{11daa}', '\u{11edf}'), - ('\u{11ef9}', '\u{11faf}'), + ('\u{11ef9}', '\u{11eff}'), + ('\u{11f11}', '\u{11f11}'), + ('\u{11f3b}', '\u{11f3d}'), + ('\u{11f5a}', '\u{11faf}'), ('\u{11fb1}', '\u{11fbf}'), ('\u{11ff2}', '\u{11ffe}'), ('\u{1239a}', '\u{123ff}'), ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'), - ('\u{12544}', '\u{12fff}'), - ('\u{1342f}', '\u{1342f}'), - ('\u{13439}', '\u{143ff}'), + ('\u{12544}', '\u{12f8f}'), + ('\u{12ff3}', '\u{12fff}'), + ('\u{13456}', '\u{143ff}'), ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), - ('\u{16a70}', '\u{16acf}'), + ('\u{16abf}', '\u{16abf}'), + ('\u{16aca}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'), ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'), @@ -5521,20 +5739,29 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{16ff2}', '\u{16fff}'), ('\u{187f8}', '\u{187ff}'), ('\u{18cd6}', '\u{18cff}'), - ('\u{18d09}', '\u{1afff}'), - ('\u{1b11f}', '\u{1b14f}'), - ('\u{1b153}', '\u{1b163}'), + ('\u{18d09}', '\u{1afef}'), + ('\u{1aff4}', '\u{1aff4}'), + ('\u{1affc}', '\u{1affc}'), + ('\u{1afff}', '\u{1afff}'), + ('\u{1b123}', '\u{1b131}'), + ('\u{1b133}', '\u{1b14f}'), + ('\u{1b153}', '\u{1b154}'), + ('\u{1b156}', '\u{1b163}'), ('\u{1b168}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'), ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca4}', '\u{1cfff}'), + ('\u{1bca4}', '\u{1ceff}'), + ('\u{1cf2e}', '\u{1cf2f}'), + ('\u{1cf47}', '\u{1cf4f}'), + ('\u{1cfc4}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'), ('\u{1d127}', '\u{1d128}'), - ('\u{1d1e9}', '\u{1d1ff}'), - ('\u{1d246}', '\u{1d2df}'), + ('\u{1d1eb}', '\u{1d1ff}'), + ('\u{1d246}', '\u{1d2bf}'), + ('\u{1d2d4}', '\u{1d2df}'), ('\u{1d2f4}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'), ('\u{1d379}', '\u{1d3ff}'), @@ -5560,18 +5787,28 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}', '\u{1daa0}'), - ('\u{1dab0}', '\u{1dfff}'), + ('\u{1dab0}', '\u{1deff}'), + ('\u{1df1f}', '\u{1df24}'), + ('\u{1df2b}', '\u{1dfff}'), ('\u{1e007}', '\u{1e007}'), ('\u{1e019}', '\u{1e01a}'), ('\u{1e022}', '\u{1e022}'), ('\u{1e025}', '\u{1e025}'), - ('\u{1e02b}', '\u{1e0ff}'), + ('\u{1e02b}', '\u{1e02f}'), + ('\u{1e06e}', '\u{1e08e}'), + ('\u{1e090}', '\u{1e0ff}'), ('\u{1e12d}', '\u{1e12f}'), ('\u{1e13e}', '\u{1e13f}'), ('\u{1e14a}', '\u{1e14d}'), - ('\u{1e150}', '\u{1e2bf}'), + ('\u{1e150}', '\u{1e28f}'), + ('\u{1e2af}', '\u{1e2bf}'), ('\u{1e2fa}', '\u{1e2fe}'), - ('\u{1e300}', '\u{1e7ff}'), + ('\u{1e300}', '\u{1e4cf}'), + ('\u{1e4fa}', '\u{1e7df}'), + ('\u{1e7e7}', '\u{1e7e7}'), + ('\u{1e7ec}', '\u{1e7ec}'), + ('\u{1e7ef}', '\u{1e7ef}'), + ('\u{1e7ff}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'), ('\u{1e8d7}', '\u{1e8ff}'), ('\u{1e94c}', '\u{1e94f}'), @@ -5625,39 +5862,39 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1f249}', '\u{1f24f}'), ('\u{1f252}', '\u{1f25f}'), ('\u{1f266}', '\u{1f2ff}'), - ('\u{1f6d8}', '\u{1f6df}'), + ('\u{1f6d8}', '\u{1f6db}'), ('\u{1f6ed}', '\u{1f6ef}'), ('\u{1f6fd}', '\u{1f6ff}'), - ('\u{1f774}', '\u{1f77f}'), - ('\u{1f7d9}', '\u{1f7df}'), - ('\u{1f7ec}', '\u{1f7ff}'), + ('\u{1f777}', '\u{1f77a}'), + ('\u{1f7da}', '\u{1f7df}'), + ('\u{1f7ec}', '\u{1f7ef}'), + ('\u{1f7f1}', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8af}'), ('\u{1f8b2}', '\u{1f8ff}'), - ('\u{1f979}', '\u{1f979}'), - ('\u{1f9cc}', '\u{1f9cc}'), ('\u{1fa54}', '\u{1fa5f}'), ('\u{1fa6e}', '\u{1fa6f}'), - ('\u{1fa75}', '\u{1fa77}'), - ('\u{1fa7b}', '\u{1fa7f}'), - ('\u{1fa87}', '\u{1fa8f}'), - ('\u{1faa9}', '\u{1faaf}'), - ('\u{1fab7}', '\u{1fabf}'), - ('\u{1fac3}', '\u{1facf}'), - ('\u{1fad7}', '\u{1faff}'), + ('\u{1fa7d}', '\u{1fa7f}'), + ('\u{1fa89}', '\u{1fa8f}'), + ('\u{1fabe}', '\u{1fabe}'), + ('\u{1fac6}', '\u{1facd}'), + ('\u{1fadc}', '\u{1fadf}'), + ('\u{1fae9}', '\u{1faef}'), + ('\u{1faf9}', '\u{1faff}'), ('\u{1fb93}', '\u{1fb93}'), ('\u{1fbcb}', '\u{1fbef}'), ('\u{1fbfa}', '\u{1ffff}'), - ('\u{2a6de}', '\u{2a6ff}'), - ('\u{2b735}', '\u{2b73f}'), + ('\u{2a6e0}', '\u{2a6ff}'), + ('\u{2b73a}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), ('\u{2ebe1}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{2ffff}'), - ('\u{3134b}', '\u{e0000}'), + ('\u{3134b}', '\u{3134f}'), + ('\u{323b0}', '\u{e0000}'), ('\u{e0002}', '\u{e001f}'), ('\u{e0080}', '\u{e00ff}'), ('\u{e01f0}', '\u{effff}'), @@ -6097,7 +6334,7 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('âŸ', 'âż'), ('â
', 'â
'), ('â', 'â'), - ('â°', 'â°ź'), + ('â°', 'â°Ż'), ('â± ', 'â± '), ('ⱹ', 'â±€'), ('Ⱨ', 'Ⱨ'), @@ -6262,13 +6499,21 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('êș', 'êș'), ('êŒ', 'êŒ'), ('êŸ', 'êŸ'), + ('ê', 'ê'), ('ê', 'ê'), - ('ê', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê”', 'ê”'), ('ïŒĄ', 'ïŒș'), ('đ', 'đ§'), ('đ°', 'đ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), ('đČ', 'đČČ'), ('đą ', 'đąż'), ('đč', 'đč'), diff --git a/src/unicode_tables/grapheme_cluster_break.rs b/src/unicode_tables/grapheme_cluster_break.rs index 7df9d2b..294dfbd 100644 --- a/src/unicode_tables/grapheme_cluster_break.rs +++ b/src/unicode_tables/grapheme_cluster_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate grapheme-cluster-break ucd-13.0.0 --chars +// ucd-generate grapheme-cluster-break ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("CR", CR), @@ -25,7 +25,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ pub const CR: &'static [(char, char)] = &[('\r', '\r')]; pub const CONTROL: &'static [(char, char)] = &[ - ('\u{0}', '\t'), + ('\0', '\t'), ('\u{b}', '\u{c}'), ('\u{e}', '\u{1f}'), ('\u{7f}', '\u{9f}'), @@ -38,7 +38,7 @@ pub const CONTROL: &'static [(char, char)] = &[ ('\u{2060}', '\u{206f}'), ('\u{feff}', '\u{feff}'), ('\u{fff0}', '\u{fffb}'), - ('\u{13430}', '\u{13438}'), + ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0000}', '\u{e001f}'), @@ -71,7 +71,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), @@ -116,6 +117,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -147,7 +149,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -171,7 +173,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1732}', '\u{1733}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -180,6 +182,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{1922}'), @@ -195,7 +198,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1a65}', '\u{1a6c}'), ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b3a}'), ('\u{1b3c}', '\u{1b3c}'), @@ -217,8 +220,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200c}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), @@ -276,12 +278,17 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'), @@ -294,6 +301,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), @@ -361,12 +369,20 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11d95}', '\u{11d95}'), ('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f40}'), + ('\u{11f42}', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d165}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d16e}', '\u{1d172}'), @@ -385,8 +401,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('đ»', 'đż'), @@ -1206,16 +1225,18 @@ pub const PREPEND: &'static [(char, char)] = &[ ('\u{600}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('à”', 'à”'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), ('đ', 'đ'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('đ€ż', 'đ€ż'), + ('đ„', 'đ„'), ('đšș', 'đšș'), ('đȘ', 'đȘ'), ('đ”', 'đ”'), + ('đŒ', 'đŒ'), ]; pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('đŠ', 'đż')]; @@ -1252,6 +1273,7 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('àł', 'àł'), ('àł', 'àł'), ('àł', 'àł'), + ('àłł', 'àłł'), ('àŽ', 'àŽ'), ('àŽż', 'à”'), ('à”', 'à”'), @@ -1268,6 +1290,8 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('á»', 'áŒ'), ('á', 'á'), ('á', 'á'), + ('á', 'á'), + ('áŽ', 'áŽ'), ('á¶', 'á¶'), ('áŸ', 'á
'), ('á', 'á'), @@ -1324,7 +1348,7 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('đ', 'đ'), ('đł', 'đ”'), ('đż', 'đ'), - ('\u{111ce}', '\u{111ce}'), + ('đ', 'đ'), ('đŹ', 'đź'), ('đČ', 'đł'), ('đ”', 'đ”'), @@ -1352,15 +1376,14 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('đŹ', 'đŹ'), ('đź', 'đŻ'), ('đ¶', 'đ¶'), - ('đ ', 'đĄ'), ('đŠ', 'đŠ'), ('đ Ź', 'đ ź'), ('đ ž', 'đ ž'), - ('\u{11931}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193d}', '\u{1193d}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11942}'), + ('đ€±', 'đ€”'), + ('đ€·', 'đ€ž'), + ('đ€œ', 'đ€œ'), + ('đ„', 'đ„'), + ('đ„', 'đ„'), ('đ§', 'đ§'), ('đ§', 'đ§'), ('đ§€', 'đ§€'), @@ -1376,8 +1399,12 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('đ¶', 'đ¶'), ('đ¶', 'đ¶'), ('đ»”', 'đ»¶'), + ('đŒ', 'đŒ'), + ('đŒŽ', 'đŒ”'), + ('đŒŸ', 'đŒż'), + ('đœ', 'đœ'), ('đœ', 'đŸ'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('đ
Š', 'đ
Š'), ('đ
', 'đ
'), ]; diff --git a/src/unicode_tables/perl_decimal.rs b/src/unicode_tables/perl_decimal.rs index 2a09259..4f4c08a 100644 --- a/src/unicode_tables/perl_decimal.rs +++ b/src/unicode_tables/perl_decimal.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate general-category ucd-13.0.0 --chars --include decimalnumber +// ucd-generate general-category ucd-15.0.0 --chars --include decimalnumber // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[("Decimal_Number", DECIMAL_NUMBER)]; @@ -60,15 +60,18 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ°', 'đč'), ('đŁ ', 'đŁ©'), - ('\u{11950}', '\u{11959}'), + ('đ„', 'đ„'), ('đ±', 'đ±'), ('đ”', 'đ”'), ('đ¶ ', 'đ¶©'), + ('đœ', 'đœ'), ('đ© ', 'đ©©'), + ('đ«', 'đ«'), ('đ', 'đ'), ('đ', 'đż'), ('đ
', 'đ
'), ('đ°', 'đč'), + ('đ°', 'đč'), ('đ„', 'đ„'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('đŻ°', 'đŻč'), ]; diff --git a/src/unicode_tables/perl_space.rs b/src/unicode_tables/perl_space.rs index c112dd1..1741695 100644 --- a/src/unicode_tables/perl_space.rs +++ b/src/unicode_tables/perl_space.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-bool ucd-13.0.0 --chars --include whitespace +// ucd-generate property-bool ucd-15.0.0 --chars --include whitespace // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[("White_Space", WHITE_SPACE)]; diff --git a/src/unicode_tables/perl_word.rs b/src/unicode_tables/perl_word.rs index df9eac7..c1b66bd 100644 --- a/src/unicode_tables/perl_word.rs +++ b/src/unicode_tables/perl_word.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate perl-word ucd-13.0.0 --chars +// ucd-generate perl-word ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const PERL_WORD: &'static [(char, char)] = &[ ('0', '9'), @@ -57,9 +57,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('à ', '\u{82d}'), ('àĄ', '\u{85b}'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('à„Š', 'à„Ż'), ('à„±', 'àŠ'), @@ -143,11 +143,12 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('à°', 'à°'), ('à°', 'à°š'), ('à°Ș', 'à°č'), - ('à°œ', 'à±'), + ('\u{c3c}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', '\u{c63}'), ('ొ', 'à±Ż'), ('àČ', 'àČ'), @@ -160,10 +161,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{cc6}', 'àł'), ('àł', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', '\u{ce3}'), ('àłŠ', 'àłŻ'), - ('àł±', 'àłČ'), + ('àł±', 'àłł'), ('\u{d00}', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', '\u{d44}'), @@ -196,7 +197,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('àș§', 'àșœ'), ('à»', 'à»'), ('à»', 'à»'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('à»', 'à»'), ('à»', 'à»'), ('àŒ', 'àŒ'), @@ -242,9 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('áź', 'áž'), - ('á', 'á'), - ('á', '\u{1714}'), - ('á ', '\u{1734}'), + ('á', 'á'), + ('á', 'áŽ'), ('á', '\u{1753}'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -254,7 +254,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('á', '\u{17dd}'), ('á ', 'á©'), ('\u{180b}', '\u{180d}'), - ('á ', 'á '), + ('\u{180f}', 'á '), ('á ', 'ᥞ'), ('áą', 'áąȘ'), ('áą°', 'ᣔ'), @@ -272,8 +272,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{1a7f}', 'áȘ'), ('áȘ', 'áȘ'), ('áȘ§', 'áȘ§'), - ('\u{1ab0}', '\u{1ac0}'), - ('\u{1b00}', 'á'), + ('\u{1ab0}', '\u{1ace}'), + ('\u{1b00}', 'á'), ('á', 'á'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '᯳'), @@ -285,8 +285,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('áČœ', 'áČż'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', 'áłș'), - ('áŽ', '\u{1df9}'), - ('\u{1dfb}', 'áŒ'), + ('áŽ', 'áŒ'), ('áŒ', 'áŒ'), ('ጠ', 'áœ
'), ('áœ', 'áœ'), @@ -327,9 +326,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â'), ('â¶', 'â©'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłł'), ('âŽ', '⎄'), ('⎧', '⎧'), @@ -358,11 +355,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ăŒ', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), ('ê', 'ê«'), @@ -371,9 +367,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('êż', '\u{a6f1}'), ('ê', 'ê'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê §'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê §'), ('\u{a82c}', '\u{a82c}'), ('êĄ', 'êĄł'), ('êą', '\u{a8c5}'), @@ -400,7 +398,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), ('êŹ°', 'ê'), - ('ê', '\u{ab69}'), + ('ê', 'ê©'), ('ê°', 'êŻȘ'), ('êŻŹ', '\u{abed}'), ('êŻ°', 'êŻč'), @@ -462,9 +460,20 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -499,31 +508,33 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('đł', 'đłČ'), ('đŽ', '\u{10d27}'), ('đŽ°', 'đŽč'), - ('\u{10e80}', '\u{10ea9}'), + ('đș', 'đș©'), ('\u{10eab}', '\u{10eac}'), - ('\u{10eb0}', '\u{10eb1}'), - ('đŒ', 'đŒ'), + ('đș°', 'đș±'), + ('\u{10efd}', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', '\u{10f50}'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', '\u{10f85}'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', '\u{11046}'), - ('đŠ', 'đŻ'), + ('đŠ', 'đ”'), ('\u{1107f}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('đ', 'đš'), ('đ°', 'đč'), ('\u{11100}', '\u{11134}'), ('đ¶', 'đż'), - ('đ
', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', '\u{11173}'), ('đ
¶', 'đ
¶'), ('\u{11180}', 'đ'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', 'đ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -548,7 +559,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{11370}', '\u{11374}'), ('đ', 'đ'), ('đ', 'đ'), - ('\u{1145e}', '\u{11461}'), + ('\u{1145e}', 'đĄ'), ('đ', 'đ
'), ('đ', 'đ'), ('đ', 'đ'), @@ -563,16 +574,17 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('đ', 'đ'), ('\u{1171d}', '\u{1172b}'), ('đ°', 'đč'), + ('đ', 'đ'), ('đ ', '\u{1183a}'), ('đą ', 'đŁ©'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€”'), + ('đ€·', 'đ€ž'), ('\u{1193b}', '\u{11943}'), - ('\u{11950}', '\u{11959}'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', '\u{119d7}'), ('\u{119da}', 'đ§Ą'), @@ -581,7 +593,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{11a47}', '\u{11a47}'), ('đ©', '\u{11a99}'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', '\u{11c36}'), ('\u{11c38}', 'đ±'), @@ -603,15 +615,23 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('đ¶', 'đ¶'), ('đ¶ ', 'đ¶©'), ('đ» ', 'đ»¶'), - ('\u{11fb0}', '\u{11fb0}'), + ('\u{11f00}', 'đŒ'), + ('đŒ', '\u{11f3a}'), + ('đŒŸ', '\u{11f42}'), + ('đœ', 'đœ'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đź'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('\u{13440}', '\u{13455}'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), ('đ© ', 'đ©©'), + ('đ©°', 'đȘŸ'), + ('đ«', 'đ«'), ('đ«', 'đ«'), ('\u{16af0}', '\u{16af4}'), ('đŹ', '\u{16b36}'), @@ -625,12 +645,17 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{16f8f}', 'đŸ'), ('đż ', 'đżĄ'), ('đżŁ', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), @@ -638,6 +663,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('đČ', 'đČ'), ('đČ', 'đČ'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('đ
', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -681,16 +708,26 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('đ°', 'đ'), + ('\u{1e08f}', '\u{1e08f}'), ('đ', 'đŹ'), ('\u{1e130}', 'đœ'), ('đ
', 'đ
'), ('đ
', 'đ
'), + ('đ', '\u{1e2ae}'), ('đ', 'đč'), + ('đ', 'đč'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('\u{1e8d0}', '\u{1e8d6}'), ('đ€', 'đ„'), @@ -731,13 +768,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('đ°', 'đ
'), ('đ
', 'đ
©'), ('đ
°', 'đ'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đŻ°', 'đŻč'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ('\u{e0100}', '\u{e01ef}'), ]; diff --git a/src/unicode_tables/property_bool.rs b/src/unicode_tables/property_bool.rs index 21cbaf9..a3e84b5 100644 --- a/src/unicode_tables/property_bool.rs +++ b/src/unicode_tables/property_bool.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-bool ucd-13.0.0 --chars +// ucd-generate property-bool ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ASCII_Hex_Digit", ASCII_HEX_DIGIT), @@ -125,8 +125,9 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('à ', '\u{82c}'), ('àĄ', 'àĄ'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('àą ', 'àŁ'), ('\u{8d4}', '\u{8df}'), ('\u{8e3}', '\u{8e9}'), ('\u{8f0}', 'à€»'), @@ -206,8 +207,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('àŻ', 'àŻ'), ('àŻ', 'àŻ'), ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', 'à°'), - ('à°
', 'à°'), + ('\u{c00}', 'à°'), ('à°', 'à°'), ('à°', 'à°š'), ('à°Ș', 'à°č'), @@ -216,6 +216,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{c4a}', '\u{c4c}'), ('\u{c55}', '\u{c56}'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', '\u{c63}'), ('àČ', 'àČ'), ('àČ
', 'àČ'), @@ -227,9 +228,9 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{cc6}', 'àł'), ('àł', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', '\u{ce3}'), - ('àł±', 'àłČ'), + ('àł±', 'àłł'), ('\u{d00}', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', 'àŽș'), @@ -267,7 +268,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('àŒ', 'àŒ'), ('àœ', 'àœ'), ('àœ', 'àœŹ'), - ('\u{f71}', '\u{f81}'), + ('\u{f71}', '\u{f83}'), ('àŸ', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('á', '\u{1036}'), @@ -303,9 +304,8 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('áź', 'áž'), - ('á', 'á'), - ('á', '\u{1713}'), - ('á ', '\u{1733}'), + ('á', '\u{1713}'), + ('á', '\u{1733}'), ('á', '\u{1753}'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -329,9 +329,10 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('á©Ą', '\u{1a74}'), ('áȘ§', 'áȘ§'), ('\u{1abf}', '\u{1ac0}'), + ('\u{1acc}', '\u{1ace}'), ('\u{1b00}', 'Ᏻ'), ('\u{1b35}', 'á'), - ('á
', 'á'), + ('á
', 'á'), ('\u{1b80}', '\u{1ba9}'), ('\u{1bac}', '៯'), ('áźș', 'ᯄ'), @@ -385,9 +386,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â'), ('â¶', 'â©'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłź'), ('âłČ', 'âłł'), ('âŽ', '⎄'), @@ -416,11 +415,10 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ăŒ', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), ('ê', 'ê'), @@ -430,9 +428,11 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('êż', 'êŻ'), ('ê', 'ê'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê
'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê
'), ('ê ', 'ê §'), ('êĄ', 'êĄł'), ('êą', 'êŁ'), @@ -463,7 +463,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), ('êŹ°', 'ê'), - ('ê', '\u{ab69}'), + ('ê', 'ê©'), ('ê°', 'êŻȘ'), ('ê°', 'íŁ'), ('í°', 'í'), @@ -514,9 +514,20 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -548,30 +559,33 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('đČ', 'đČČ'), ('đł', 'đłČ'), ('đŽ', '\u{10d27}'), - ('\u{10e80}', '\u{10ea9}'), + ('đș', 'đș©'), ('\u{10eab}', '\u{10eac}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș°', 'đș±'), ('đŒ', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', 'đœ
'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', 'đŸ'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', '\u{11045}'), - ('đ', 'đž'), + ('đ±', 'đ”'), + ('\u{11080}', 'đž'), + ('\u{110c2}', '\u{110c2}'), ('đ', 'đš'), ('\u{11100}', '\u{11132}'), - ('đ
', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', 'đ
Č'), ('đ
¶', 'đ
¶'), ('\u{11180}', 'đż'), ('đ', 'đ'), - ('\u{111ce}', '\u{111cf}'), + ('đ', '\u{111cf}'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', '\u{11234}'), ('\u{11237}', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -594,7 +608,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('đ', 'đ'), ('\u{11443}', 'đ
'), ('đ', 'đ'), - ('đ', '\u{11461}'), + ('đ', 'đĄ'), ('đ', 'đ'), ('đ', 'đ
'), ('đ', 'đ'), @@ -608,16 +622,17 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('đž', 'đž'), ('đ', 'đ'), ('\u{1171d}', '\u{1172a}'), + ('đ', 'đ'), ('đ ', 'đ ž'), ('đą ', 'đŁ'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€”'), + ('đ€·', 'đ€ž'), ('\u{1193b}', '\u{1193c}'), - ('\u{1193f}', '\u{11942}'), + ('đ€ż', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', '\u{119d7}'), ('\u{119da}', 'đ§'), @@ -627,7 +642,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{11a35}', '\u{11a3e}'), ('đ©', 'đȘ'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', '\u{11c36}'), ('\u{11c38}', 'đ°Ÿ'), @@ -650,14 +665,20 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('đ¶', 'đ¶'), ('đ¶', 'đ¶'), ('đ» ', 'đ»¶'), - ('\u{11fb0}', '\u{11fb0}'), + ('\u{11f00}', 'đŒ'), + ('đŒ', '\u{11f3a}'), + ('đŒŸ', '\u{11f40}'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đź'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), + ('đ©°', 'đȘŸ'), ('đ«', 'đ«'), ('đŹ', 'đŹŻ'), ('đ', 'đ'), @@ -669,12 +690,17 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{16f8f}', 'đŸ'), ('đż ', 'đżĄ'), ('đżŁ', 'đżŁ'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), @@ -712,15 +738,25 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('đ', 'đš'), ('đȘ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('đ°', 'đ'), + ('\u{1e08f}', '\u{1e08f}'), ('đ', 'đŹ'), ('đ·', 'đœ'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ', 'đ«'), + ('đ', 'đ«'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('đ€', 'đ„'), ('\u{1e947}', '\u{1e947}'), @@ -761,13 +797,14 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('đ°', 'đ
'), ('đ
', 'đ
©'), ('đ
°', 'đ'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const BIDI_CONTROL: &'static [(char, char)] = &[ @@ -873,6 +910,7 @@ pub const BIDI_MIRRORED: &'static [(char, char)] = &[ ('âž', 'âž'), ('âž', 'âž'), ('âž ', 'âž©'), + ('âč', 'âč'), ('ă', 'ă'), ('ă', 'ă'), ('ïč', 'ïč'), @@ -936,7 +974,10 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{7fd}', '\u{7fd}'), ('\u{816}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{902}'), + ('àą', 'àą'), + ('\u{890}', '\u{891}'), + ('\u{898}', '\u{89f}'), + ('àŁ', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), ('\u{941}', '\u{948}'), @@ -977,6 +1018,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{bcd}', '\u{bcd}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -1003,7 +1045,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), ('à»', 'à»'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -1028,7 +1070,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('áŒ', 'áŒ'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1732}', '\u{1733}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -1037,7 +1079,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{17c9}', '\u{17d3}'), ('á', 'á'), ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180e}'), + ('\u{180b}', '\u{180f}'), ('áĄ', 'áĄ'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), @@ -1055,7 +1097,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), ('áȘ§', 'áȘ§'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b34}'), ('\u{1b36}', '\u{1b3a}'), @@ -1081,8 +1123,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{1cf8}', '\u{1cf9}'), ('ᎏ', 'á”Ș'), ('ᔞ', 'ᔞ'), - ('á¶', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('á¶', '\u{1dff}'), ('ៜ', 'ៜ'), ('áŸż', 'áż'), ('áż', 'áż'), @@ -1123,6 +1164,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('ê', 'êĄ'), ('ê°', 'ê°'), ('ê', 'ê'), + ('êČ', 'êŽ'), ('êž', 'êč'), ('\u{a802}', '\u{a802}'), ('\u{a806}', '\u{a806}'), @@ -1157,12 +1199,12 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('ê«ł', 'ê«Ž'), ('\u{aaf6}', '\u{aaf6}'), ('ê', 'ê'), - ('\u{ab69}', '\u{ab6b}'), + ('ê©', 'ê«'), ('\u{abe5}', '\u{abe5}'), ('\u{abe8}', '\u{abe8}'), ('\u{abed}', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'), - ('ïźČ', 'ïŻ'), + ('ïźČ', 'ïŻ'), ('\u{fe00}', '\u{fe0f}'), ('ïž', 'ïž'), ('\u{fe20}', '\u{fe2f}'), @@ -1181,6 +1223,9 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{101fd}', '\u{101fd}'), ('\u{102e0}', '\u{102e0}'), ('\u{10376}', '\u{1037a}'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('\u{10a01}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', '\u{10a0f}'), @@ -1189,13 +1234,18 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), ('\u{110bd}', '\u{110bd}'), + ('\u{110c2}', '\u{110c2}'), ('\u{110cd}', '\u{110cd}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), @@ -1209,6 +1259,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), @@ -1270,7 +1321,12 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{11d95}', '\u{11d95}'), ('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'), - ('\u{13430}', '\u{13438}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f40}'), + ('\u{11f42}', '\u{11f42}'), + ('\u{13430}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('đ', 'đ'), @@ -1278,8 +1334,13 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{16f8f}', 'đŸ'), ('đż ', 'đżĄ'), ('đżŁ', '\u{16fe4}'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1bca0}', '\u{1bca3}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d173}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), @@ -1296,8 +1357,12 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('đ°', 'đ'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', 'đœ'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('đ«', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', 'đ„'), ('đ»', 'đż'), @@ -1338,7 +1403,7 @@ pub const CASED: &'static [(char, char)] = &[ ('á', 'á'), ('á', 'á'), ('á', 'áș'), - ('áœ', 'áż'), + ('áŒ', 'áż'), ('á ', 'á”'), ('áž', 'áœ'), ('áČ', 'áČ'), @@ -1384,9 +1449,7 @@ pub const CASED: &'static [(char, char)] = &[ ('â
', 'â
ż'), ('â', 'â'), ('â¶', 'â©'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłź'), ('âłČ', 'âłł'), ('âŽ', '⎄'), @@ -1396,12 +1459,14 @@ pub const CASED: &'static [(char, char)] = &[ ('ê', 'ê'), ('êą', 'ê'), ('ê', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', '\u{a7f6}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê¶'), ('êž', 'êș'), ('êŹ°', 'ê'), - ('ê', '\u{ab68}'), + ('ê', 'ê©'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), ('ïŹ', 'ïŹ'), @@ -1410,6 +1475,18 @@ pub const CASED: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ°', 'đ'), ('đ', 'đ»'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), + ('đ', 'đ'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đČ', 'đČČ'), ('đł', 'đłČ'), ('đą ', 'đŁ'), @@ -1444,6 +1521,10 @@ pub const CASED: &'static [(char, char)] = &[ ('đ', 'đš'), ('đȘ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), ('đ€', 'đ„'), ('đ°', 'đ
'), ('đ
', 'đ
©'), @@ -1886,7 +1967,7 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('â
', 'â
Ż'), ('â', 'â'), ('â¶', 'â'), - ('â°', 'â°ź'), + ('â°', 'â°Ż'), ('â± ', 'â± '), ('ⱹ', 'â±€'), ('Ⱨ', 'Ⱨ'), @@ -2051,16 +2132,24 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('êș', 'êș'), ('êŒ', 'êŒ'), ('êŸ', 'êŸ'), + ('ê', 'ê'), ('ê', 'ê'), - ('ê', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê”', 'ê”'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), ('ïŹ', 'ïŹ'), ('ïŒĄ', 'ïŒș'), ('đ', 'đ§'), ('đ°', 'đ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), ('đČ', 'đČČ'), ('đą ', 'đąż'), ('đč', 'đč'), @@ -2156,9 +2245,7 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('â
', 'â
ż'), ('â', 'â'), ('â¶', 'â©'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'â±°'), + ('â°', 'â±°'), ('â±Č', 'ⱳ'), ('â±”', 'ⱶ'), ('ⱟ', 'ⳣ'), @@ -2175,9 +2262,10 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('ê', 'ê'), ('ê', 'ê'), ('ê', 'êź'), - ('ê°', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', '\u{a7f6}'), + ('ê°', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê”', 'ê¶'), ('ê', 'ê'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), @@ -2187,6 +2275,14 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ°', 'đ'), ('đ', 'đ»'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đČ', 'đČČ'), ('đł', 'đłČ'), ('đą ', 'đŁ'), @@ -2620,7 +2716,7 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('â
', 'â
Ż'), ('â', 'â'), ('â¶', 'â'), - ('â°', 'â°ź'), + ('â°', 'â°Ż'), ('â± ', 'â± '), ('ⱹ', 'â±€'), ('Ⱨ', 'Ⱨ'), @@ -2785,13 +2881,21 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('êș', 'êș'), ('êŒ', 'êŒ'), ('êŸ', 'êŸ'), + ('ê', 'ê'), ('ê', 'ê'), - ('ê', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê”', 'ê”'), ('ïŒĄ', 'ïŒș'), ('đ', 'đ§'), ('đ°', 'đ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), ('đČ', 'đČČ'), ('đą ', 'đąż'), ('đč', 'đč'), @@ -3237,7 +3341,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('â
°', 'â
ż'), ('â', 'â'), ('â', 'â©'), - ('â°°', 'â±'), + ('â°°', 'â±'), ('ⱥ', 'ⱥ'), ('ⱄ', 'ⱊ'), ('ⱚ', 'ⱚ'), @@ -3402,10 +3506,14 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ê»', 'ê»'), ('êœ', 'êœ'), ('êż', 'êż'), + ('ê', 'ê'), ('ê', 'ê'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê¶', 'ê¶'), ('ê', 'ê'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), @@ -3413,6 +3521,10 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ïœ', 'ïœ'), ('đš', 'đ'), ('đ', 'đ»'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đł', 'đłČ'), ('đŁ', 'đŁ'), ('đč ', 'đčż'), @@ -3859,7 +3971,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('â
°', 'â
ż'), ('â', 'â'), ('â', 'â©'), - ('â°°', 'â±'), + ('â°°', 'â±'), ('ⱥ', 'ⱥ'), ('ⱄ', 'ⱊ'), ('ⱚ', 'ⱚ'), @@ -4024,10 +4136,14 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ê»', 'ê»'), ('êœ', 'êœ'), ('êż', 'êż'), + ('ê', 'ê'), ('ê', 'ê'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê¶', 'ê¶'), ('ê', 'ê'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), @@ -4035,6 +4151,10 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ïœ', 'ïœ'), ('đš', 'đ'), ('đ', 'đ»'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đł', 'đłČ'), ('đŁ', 'đŁ'), ('đč ', 'đčż'), @@ -4056,6 +4176,7 @@ pub const DASH: &'static [(char, char)] = &[ ('âž', 'âž'), ('âžș', 'âž»'), ('âč', 'âč'), + ('âč', 'âč'), ('ă', 'ă'), ('ă°', 'ă°'), ('ă ', 'ă '), @@ -4063,7 +4184,7 @@ pub const DASH: &'static [(char, char)] = &[ ('ïč', 'ïč'), ('ïčŁ', 'ïčŁ'), ('ïŒ', 'ïŒ'), - ('\u{10ead}', '\u{10ead}'), + ('đș', 'đș'), ]; pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[ @@ -4072,7 +4193,7 @@ pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[ ('\u{61c}', '\u{61c}'), ('á
', 'á
'), ('\u{17b4}', '\u{17b5}'), - ('\u{180b}', '\u{180e}'), + ('\u{180b}', '\u{180f}'), ('\u{200b}', '\u{200f}'), ('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{206f}'), @@ -4126,6 +4247,8 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', 'ß”'), ('\u{818}', '\u{819}'), + ('\u{898}', '\u{89f}'), + ('àŁ', '\u{8d2}'), ('\u{8e3}', '\u{8fe}'), ('\u{93c}', '\u{93c}'), ('\u{94d}', '\u{94d}'), @@ -4142,6 +4265,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{b4d}', '\u{b4d}'), ('\u{b55}', '\u{b55}'), ('\u{bcd}', '\u{bcd}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c4d}', '\u{c4d}'), ('\u{cbc}', '\u{cbc}'), ('\u{ccd}', '\u{ccd}'), @@ -4168,12 +4292,14 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('á', 'á'), ('á', 'á'), ('\u{135d}', '\u{135f}'), + ('\u{1714}', 'á'), ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{1939}', '\u{193b}'), ('\u{1a75}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1abd}'), + ('\u{1ab0}', '\u{1abe}'), + ('\u{1ac1}', '\u{1acb}'), ('\u{1b34}', '\u{1b34}'), ('á', 'á'), ('\u{1b6b}', '\u{1b73}'), @@ -4186,8 +4312,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('áł·', '\u{1cf9}'), ('ᎏ', 'á”Ș'), ('\u{1dc4}', '\u{1dcf}'), - ('\u{1df5}', '\u{1df9}'), - ('\u{1dfd}', '\u{1dff}'), + ('\u{1df5}', '\u{1dff}'), ('ៜ', 'ៜ'), ('áŸż', 'áż'), ('áż', 'áż'), @@ -4218,7 +4343,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{aabf}', 'ê«'), ('\u{aaf6}', '\u{aaf6}'), ('ê', 'ê'), - ('\u{ab69}', '\u{ab6b}'), + ('ê©', 'ê«'), ('êŻŹ', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'), ('\u{fe20}', '\u{fe2f}'), @@ -4228,9 +4353,16 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{ff9e}', '\u{ff9f}'), ('ïżŁ', 'ïżŁ'), ('\u{102e0}', '\u{102e0}'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('\u{10ae5}', '\u{10ae6}'), ('đŽą', '\u{10d27}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), + ('\u{11046}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), ('\u{110b9}', '\u{110ba}'), ('\u{11133}', '\u{11134}'), ('\u{11173}', '\u{11173}'), @@ -4250,7 +4382,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('đ¶', '\u{116b7}'), ('\u{1172b}', '\u{1172b}'), ('\u{11839}', '\u{1183a}'), - ('\u{1193d}', '\u{1193e}'), + ('đ€œ', '\u{1193e}'), ('\u{11943}', '\u{11943}'), ('\u{119e0}', '\u{119e0}'), ('\u{11a34}', '\u{11a34}'), @@ -4260,16 +4392,24 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{11d42}', '\u{11d42}'), ('\u{11d44}', '\u{11d45}'), ('\u{11d97}', '\u{11d97}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f8f}', 'đŸ'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('đ
', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), + ('đ°', 'đ'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e946}'), @@ -4410,25 +4550,24 @@ pub const EMOJI: &'static [(char, char)] = &[ ('đș', 'đ'), ('đ', 'đ
'), ('đ', 'đ'), - ('đ', '\u{1f6d7}'), - ('đ ', 'đ„'), + ('đ', 'đ'), + ('đ', 'đ„'), ('đ©', 'đ©'), ('đ«', 'đŹ'), ('đ°', 'đ°'), - ('đł', '\u{1f6fc}'), + ('đł', 'đŒ'), ('đ ', 'đ«'), - ('\u{1f90c}', 'đ€ș'), + ('đ°', 'đ°'), + ('đ€', 'đ€ș'), ('đ€Œ', 'đ„
'), - ('đ„', '\u{1f978}'), - ('đ„ș', '\u{1f9cb}'), - ('đ§', 'đ§ż'), - ('đ©°', '\u{1fa74}'), - ('đ©ž', 'đ©ș'), - ('đȘ', '\u{1fa86}'), - ('đȘ', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), + ('đ„', 'đ§ż'), + ('đ©°', 'đ©Œ'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘœ'), + ('đȘż', 'đ«
'), + ('đ«', 'đ«'), + ('đ« ', 'đ«š'), + ('đ«°', 'đ«ž'), ]; pub const EMOJI_COMPONENT: &'static [(char, char)] = &[ @@ -4473,18 +4612,20 @@ pub const EMOJI_MODIFIER_BASE: &'static [(char, char)] = &[ ('đŽ', 'đ¶'), ('đ', 'đ'), ('đ', 'đ'), - ('\u{1f90c}', '\u{1f90c}'), + ('đ€', 'đ€'), ('đ€', 'đ€'), ('đ€', 'đ€'), ('đ€Š', 'đ€Š'), ('đ€°', 'đ€č'), ('đ€Œ', 'đ€Ÿ'), - ('\u{1f977}', '\u{1f977}'), + ('đ„·', 'đ„·'), ('đŠ”', 'đŠ¶'), ('đŠž', 'đŠč'), ('đŠ»', 'đŠ»'), ('đ§', 'đ§'), ('đ§', 'đ§'), + ('đ«', 'đ«
'), + ('đ«°', 'đ«ž'), ]; pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[ @@ -4553,22 +4694,22 @@ pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[ ('đ', 'đ
'), ('đ', 'đ'), ('đ', 'đ'), - ('đ', '\u{1f6d7}'), + ('đ', 'đ'), + ('đ', 'đ'), ('đ«', 'đŹ'), - ('đŽ', '\u{1f6fc}'), + ('đŽ', 'đŒ'), ('đ ', 'đ«'), - ('\u{1f90c}', 'đ€ș'), + ('đ°', 'đ°'), + ('đ€', 'đ€ș'), ('đ€Œ', 'đ„
'), - ('đ„', '\u{1f978}'), - ('đ„ș', '\u{1f9cb}'), - ('đ§', 'đ§ż'), - ('đ©°', '\u{1fa74}'), - ('đ©ž', 'đ©ș'), - ('đȘ', '\u{1fa86}'), - ('đȘ', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), + ('đ„', 'đ§ż'), + ('đ©°', 'đ©Œ'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘœ'), + ('đȘż', 'đ«
'), + ('đ«', 'đ«'), + ('đ« ', 'đ«š'), + ('đ«°', 'đ«ž'), ]; pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[ @@ -4623,13 +4764,13 @@ pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[ ('ă', 'ă'), ('ă', 'ă'), ('đ', '\u{1f0ff}'), - ('\u{1f10d}', '\u{1f10f}'), + ('đ', 'đ'), ('đŻ', 'đŻ'), ('đ
Ź', 'đ
±'), ('đ
Ÿ', 'đ
ż'), ('đ', 'đ'), ('đ', 'đ'), - ('\u{1f1ad}', '\u{1f1e5}'), + ('đ', '\u{1f1e5}'), ('đ', '\u{1f20f}'), ('đ', 'đ'), ('đŻ', 'đŻ'), @@ -4639,14 +4780,14 @@ pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[ ('đ', 'đœ'), ('đ', 'đ'), ('đ', '\u{1f6ff}'), - ('\u{1f774}', '\u{1f77f}'), + ('đŽ', 'đż'), ('đ', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8ff}'), - ('\u{1f90c}', 'đ€ș'), + ('đ€', 'đ€ș'), ('đ€Œ', 'đ„
'), ('đ„', '\u{1faff}'), ('\u{1fc00}', '\u{1fffd}'), @@ -4677,6 +4818,7 @@ pub const EXTENDER: &'static [(char, char)] = &[ ('ê«', 'ê«'), ('ê«ł', 'ê«Ž'), ('', ''), + ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('\u{11a98}', '\u{11a98}'), @@ -4709,7 +4851,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ŚŻ', 'ŚŽ'), ('Ű', 'Ű'), ('Ű', 'Ű'), - ('Ű', 'Ù'), + ('Ű', 'Ù'), ('Ù ', 'ÙŻ'), ('Ù±', 'Û'), ('Û', 'Û'), @@ -4730,8 +4872,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('àĄ', 'àĄ'), ('àĄ', 'àĄ'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), + ('àĄ°', 'àą'), + ('àą ', 'àŁ'), ('à€', 'à€č'), ('à€»', 'à€»'), ('à€œ', 'à„'), @@ -4820,6 +4962,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('à°œ', 'à°œ'), ('à±', 'à±'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', 'à±Ą'), ('ొ', 'à±Ż'), ('à±·', 'àČ'), @@ -4833,10 +4976,10 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('àł', 'àł'), ('àł', 'àł'), ('àł', 'àł'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', 'àłĄ'), ('àłŠ', 'àłŻ'), - ('àł±', 'àłČ'), + ('àł±', 'àłł'), ('àŽ', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', 'àŽș'), @@ -4922,10 +5065,10 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('áž', 'áœ'), ('á', 'á'), ('á ', 'áž'), - ('á', 'á'), - ('á', 'á'), - ('á ', 'á±'), - ('á”', 'á¶'), + ('á', 'á'), + ('á', 'á'), + ('á', 'á±'), + ('áŽ', 'á¶'), ('á', 'á'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -4967,9 +5110,9 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('áŹ', 'Ᏻ'), ('ᏻ', 'ᏻ'), ('áŹœ', 'á'), - ('á', 'á'), + ('á', 'á'), ('á', 'áȘ'), - ('áŽ', 'áŒ'), + ('áŽ', 'áŸ'), ('áź', '៥'), ('៊', '៧'), ('áźȘ', 'áźȘ'), @@ -5013,15 +5156,13 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('â°', 'â±'), ('âŽ', 'â'), ('â', 'â'), - ('â ', 'âż'), + ('â ', 'â'), ('â', 'â'), ('â', 'âŠ'), ('â', 'â'), ('â ', 'âł'), ('â¶', 'âź'), - ('\u{2b97}', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'âłź'), + ('âź', 'âłź'), ('âłČ', 'âłł'), ('âłč', '⎄'), ('⎧', '⎧'), @@ -5037,7 +5178,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('â·', 'â·'), ('â·', 'â·'), ('â·', 'â·'), - ('âž', '\u{2e52}'), + ('âž', 'âč'), ('âș', 'âș'), ('âș', '⻳'), ('âŒ', 'âż'), @@ -5050,8 +5191,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ă±', 'ă'), ('ă', 'ăŁ'), ('ă°', 'ă'), - ('ă ', '\u{9ffc}'), - ('ê', 'ê'), + ('ă ', 'ê'), ('ê', 'ê'), ('ê', 'ê«'), ('ê', 'êź'), @@ -5059,9 +5199,11 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('êŸ', 'ê'), ('ê ', 'êŻ'), ('êČ', 'ê·'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê '), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê '), ('ê ', 'ê
'), ('ê ', 'ê '), ('ê ', 'ê €'), @@ -5103,7 +5245,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('êŹ', 'êŹ'), ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), - ('êŹ°', '\u{ab6b}'), + ('êŹ°', 'ê«'), ('ê°', 'êŻ€'), ('êŻŠ', 'êŻ§'), ('êŻ©', 'êŻŹ'), @@ -5121,11 +5263,11 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ïŹŸ', 'ïŹŸ'), ('ï', 'ï'), ('ï', 'ï'), - ('ï', 'ïŻ'), - ('ïŻ', 'ïŽż'), - ('ï”', 'ï¶'), + ('ï', 'ïŻ'), + ('ïŻ', 'ï¶'), ('ï¶', 'ï·'), - ('ï·°', 'ï·œ'), + ('ï·', 'ï·'), + ('ï·°', 'ï·ż'), ('ïž', 'ïž'), ('ïž°', 'ïč'), ('ïč', 'ïčŠ'), @@ -5151,7 +5293,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đł'), ('đ·', 'đ'), - ('đ', '\u{1019c}'), + ('đ', 'đ'), ('đ ', 'đ '), ('đ', 'đŒ'), ('đ', 'đ'), @@ -5169,10 +5311,20 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), - ('đŻ', 'đŻ'), + ('đŻ', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -5209,18 +5361,22 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đłș', 'đŽŁ'), ('đŽ°', 'đŽč'), ('đč ', 'đčŸ'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10ead}', '\u{10ead}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș', 'đș©'), + ('đș', 'đș'), + ('đș°', 'đș±'), ('đŒ', 'đŒ§'), ('đŒ°', 'đœ
'), ('đœ', 'đœ'), - ('\u{10fb0}', '\u{10fcb}'), + ('đœ°', 'đŸ'), + ('đŸ', 'đŸ'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', 'đ'), ('đ', 'đ·'), ('đ', 'đ'), ('đ', 'đŻ'), + ('đ±', 'đČ'), + ('đ”', 'đ”'), ('đ', 'đČ'), ('đ·', 'đž'), ('đ»', 'đŒ'), @@ -5229,12 +5385,12 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ°', 'đč'), ('đ', 'đŠ'), ('đŹ', 'đŹ'), - ('đ¶', '\u{11147}'), + ('đ¶', 'đ
'), ('đ
', 'đ
Č'), ('đ
Ž', 'đ
¶'), ('đ', 'đ”'), ('đż', 'đ'), - ('đ', '\u{111ce}'), + ('đ', 'đ'), ('đ', 'đ'), ('đĄ', 'đŽ'), ('đ', 'đ'), @@ -5242,6 +5398,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đČ', 'đł'), ('đ”', 'đ”'), ('đž', 'đœ'), + ('đż', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -5269,7 +5426,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ
', 'đ
'), ('đ', 'đ'), ('đ', 'đ'), - ('đ', '\u{11461}'), + ('đ', 'đĄ'), ('đ', 'đŻ'), ('đ±', 'đČ'), ('đč', 'đč'), @@ -5293,27 +5450,27 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đŹ', 'đŹ'), ('đź', 'đŻ'), ('đ¶', 'đ¶'), - ('đž', 'đž'), + ('đž', 'đč'), ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đĄ'), ('đŠ', 'đŠ'), - ('đ°', 'đż'), + ('đ°', 'đ'), ('đ ', 'đ ź'), ('đ ž', 'đ ž'), ('đ »', 'đ »'), ('đą ', 'đŁČ'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{11931}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193d}', '\u{1193d}'), - ('\u{1193f}', '\u{11942}'), - ('\u{11944}', '\u{11946}'), - ('\u{11950}', '\u{11959}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€Ż'), + ('đ€±', 'đ€”'), + ('đ€·', 'đ€ž'), + ('đ€œ', 'đ€œ'), + ('đ€ż', 'đ„'), + ('đ„', 'đ„'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', 'đ§'), ('đ§', 'đ§'), @@ -5327,7 +5484,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ©', 'đȘ'), ('đȘ', 'đȘ'), ('đȘ', 'đȘą'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), + ('đŹ', 'đŹ'), ('đ°', 'đ°'), ('đ°', 'đ°Ż'), ('đ°Ÿ', 'đ°Ÿ'), @@ -5351,18 +5509,26 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ¶ ', 'đ¶©'), ('đ» ', 'đ»Č'), ('đ»”', 'đ»ž'), - ('\u{11fb0}', '\u{11fb0}'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ”'), + ('đŒŸ', 'đŒż'), + ('đœ', 'đœ'), + ('đœ', 'đœ'), + ('đŸ°', 'đŸ°'), ('đż', 'đż±'), ('đżż', 'đ'), ('đ', 'đź'), ('đ°', 'đŽ'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đżČ'), + ('đ', 'đŻ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), ('đ© ', 'đ©©'), - ('đ©ź', 'đ©Ż'), + ('đ©ź', 'đȘŸ'), + ('đ«', 'đ«'), ('đ«', 'đ«'), ('đ«”', 'đ«”'), ('đŹ', 'đŹŻ'), @@ -5376,12 +5542,17 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đœ', 'đŸ'), ('đŸ', 'đŸ'), ('đż ', 'đżŁ'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), @@ -5390,6 +5561,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đČ', 'đČ'), ('đČ', 'đČ'), ('đČ', 'đČ'), + ('đœ', 'đż'), ('đ', 'đ”'), ('đ', 'đŠ'), ('đ©', 'đ
€'), @@ -5397,9 +5569,10 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ
Ș', 'đ
'), ('đ', 'đ'), ('đ', 'đ©'), - ('đź', 'đš'), + ('đź', 'đȘ'), ('đ', 'đ'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ ', 'đł'), ('đ', 'đ'), ('đ ', 'đž'), @@ -5428,13 +5601,23 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ©', 'đ©Ž'), ('đ©¶', 'đȘ'), ('đȘ
', 'đȘ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), ('đ', 'đŹ'), ('đ·', 'đœ'), ('đ
', 'đ
'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ', 'đ«'), ('đ°', 'đč'), ('đż', 'đż'), + ('đ', 'đ«'), + ('đ°', 'đč'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('đŁ', 'đŁ'), ('đ€', 'đ„'), @@ -5483,45 +5666,45 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('đ±', 'đż'), ('đ', 'đ'), ('đ', 'đ”'), - ('đ', '\u{1f1ad}'), + ('đ', 'đ'), ('đŠ', 'đ'), ('đ', 'đ»'), ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đ„'), - ('đ', '\u{1f6d7}'), - ('đ ', 'đŹ'), - ('đ°', '\u{1f6fc}'), - ('đ', 'đł'), - ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đŹ'), + ('đ°', 'đŒ'), + ('đ', 'đ¶'), + ('đ»', 'đ'), ('đ ', 'đ«'), + ('đ°', 'đ°'), ('đ ', 'đ '), ('đ ', 'đĄ'), ('đĄ', 'đĄ'), ('đĄ ', 'đą'), ('đą', 'đą'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('đ€', '\u{1f978}'), - ('đ„ș', '\u{1f9cb}'), - ('đ§', 'đ©'), + ('đą°', 'đą±'), + ('đ€', 'đ©'), ('đ© ', 'đ©'), - ('đ©°', '\u{1fa74}'), - ('đ©ž', 'đ©ș'), - ('đȘ', '\u{1fa86}'), - ('đȘ', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ©°', 'đ©Œ'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘœ'), + ('đȘż', 'đ«
'), + ('đ«', 'đ«'), + ('đ« ', 'đ«š'), + ('đ«°', 'đ«ž'), + ('đŹ', 'đź'), + ('đź', 'đŻ'), + ('đŻ°', 'đŻč'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ @@ -5549,7 +5732,8 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), @@ -5594,6 +5778,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -5625,7 +5810,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -5649,7 +5834,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1732}', '\u{1733}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -5658,6 +5843,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{1922}'), @@ -5673,7 +5859,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1a65}', '\u{1a6c}'), ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b3a}'), ('\u{1b3c}', '\u{1b3c}'), @@ -5695,8 +5881,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200c}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), @@ -5754,12 +5939,17 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'), @@ -5772,6 +5962,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), @@ -5839,12 +6030,20 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{11d95}', '\u{11d95}'), ('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f40}'), + ('\u{11f42}', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d165}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d16e}', '\u{1d172}'), @@ -5863,8 +6062,11 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'), @@ -5887,8 +6089,8 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{eba}', '\u{eba}'), ('\u{f84}', '\u{f84}'), ('\u{1039}', '\u{103a}'), - ('\u{1714}', '\u{1714}'), - ('\u{1734}', '\u{1734}'), + ('\u{1714}', 'á'), + ('áŽ', 'áŽ'), ('\u{17d2}', '\u{17d2}'), ('\u{1a60}', '\u{1a60}'), ('á', 'á'), @@ -5904,6 +6106,7 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{abed}', '\u{abed}'), ('\u{10a3f}', '\u{10a3f}'), ('\u{11046}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), ('\u{1107f}', '\u{1107f}'), ('\u{110b9}', '\u{110b9}'), ('\u{11133}', '\u{11134}'), @@ -5918,7 +6121,7 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('đ¶', 'đ¶'), ('\u{1172b}', '\u{1172b}'), ('\u{11839}', '\u{11839}'), - ('\u{1193d}', '\u{1193e}'), + ('đ€œ', '\u{1193e}'), ('\u{119e0}', '\u{119e0}'), ('\u{11a34}', '\u{11a34}'), ('\u{11a47}', '\u{11a47}'), @@ -5926,6 +6129,7 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{11c3f}', '\u{11c3f}'), ('\u{11d44}', '\u{11d45}'), ('\u{11d97}', '\u{11d97}'), + ('đœ', '\u{11f42}'), ]; pub const HEX_DIGIT: &'static [(char, char)] = &[ @@ -6007,9 +6211,9 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('à ', '\u{82d}'), ('àĄ', '\u{85b}'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('à„Š', 'à„Ż'), ('à„±', 'àŠ'), @@ -6093,11 +6297,12 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('à°', 'à°'), ('à°', 'à°š'), ('à°Ș', 'à°č'), - ('à°œ', 'à±'), + ('\u{c3c}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', '\u{c63}'), ('ొ', 'à±Ż'), ('àČ', 'àČ'), @@ -6110,10 +6315,10 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{cc6}', 'àł'), ('àł', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', '\u{ce3}'), ('àłŠ', 'àłŻ'), - ('àł±', 'àłČ'), + ('àł±', 'àłł'), ('\u{d00}', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', '\u{d44}'), @@ -6146,7 +6351,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('àș§', 'àșœ'), ('à»', 'à»'), ('à»', 'à»'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('à»', 'à»'), ('à»', 'à»'), ('àŒ', 'àŒ'), @@ -6193,9 +6398,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('áź', 'áž'), - ('á', 'á'), - ('á', '\u{1714}'), - ('á ', '\u{1734}'), + ('á', 'á'), + ('á', 'áŽ'), ('á', '\u{1753}'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -6205,7 +6409,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('á', '\u{17dd}'), ('á ', 'á©'), ('\u{180b}', '\u{180d}'), - ('á ', 'á '), + ('\u{180f}', 'á '), ('á ', 'ᥞ'), ('áą', 'áąȘ'), ('áą°', 'ᣔ'), @@ -6224,8 +6428,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('áȘ', 'áȘ'), ('áȘ§', 'áȘ§'), ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ac0}'), - ('\u{1b00}', 'á'), + ('\u{1abf}', '\u{1ace}'), + ('\u{1b00}', 'á'), ('á', 'á'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '᯳'), @@ -6237,8 +6441,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('áČœ', 'áČż'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', 'áłș'), - ('áŽ', '\u{1df9}'), - ('\u{1dfb}', 'áŒ'), + ('áŽ', 'áŒ'), ('áŒ', 'áŒ'), ('ጠ', 'áœ
'), ('áœ', 'áœ'), @@ -6278,9 +6481,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â
'), ('â
', 'â'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłł'), ('âŽ', '⎄'), ('⎧', '⎧'), @@ -6307,11 +6508,10 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ăŒ', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), ('ê', 'ê«'), @@ -6320,9 +6520,11 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('êż', '\u{a6f1}'), ('ê', 'ê'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê §'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê §'), ('\u{a82c}', '\u{a82c}'), ('êĄ', 'êĄł'), ('êą', '\u{a8c5}'), @@ -6349,7 +6551,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), ('êŹ°', 'ê'), - ('ê', '\u{ab69}'), + ('ê', 'ê©'), ('ê°', 'êŻȘ'), ('êŻŹ', '\u{abed}'), ('êŻ°', 'êŻč'), @@ -6411,9 +6613,20 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -6448,31 +6661,33 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('đł', 'đłČ'), ('đŽ', '\u{10d27}'), ('đŽ°', 'đŽč'), - ('\u{10e80}', '\u{10ea9}'), + ('đș', 'đș©'), ('\u{10eab}', '\u{10eac}'), - ('\u{10eb0}', '\u{10eb1}'), - ('đŒ', 'đŒ'), + ('đș°', 'đș±'), + ('\u{10efd}', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', '\u{10f50}'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', '\u{10f85}'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', '\u{11046}'), - ('đŠ', 'đŻ'), + ('đŠ', 'đ”'), ('\u{1107f}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('đ', 'đš'), ('đ°', 'đč'), ('\u{11100}', '\u{11134}'), ('đ¶', 'đż'), - ('đ
', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', '\u{11173}'), ('đ
¶', 'đ
¶'), ('\u{11180}', 'đ'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', 'đ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -6497,7 +6712,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{11370}', '\u{11374}'), ('đ', 'đ'), ('đ', 'đ'), - ('\u{1145e}', '\u{11461}'), + ('\u{1145e}', 'đĄ'), ('đ', 'đ
'), ('đ', 'đ'), ('đ', 'đ'), @@ -6512,16 +6727,17 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('đ', 'đ'), ('\u{1171d}', '\u{1172b}'), ('đ°', 'đč'), + ('đ', 'đ'), ('đ ', '\u{1183a}'), ('đą ', 'đŁ©'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€”'), + ('đ€·', 'đ€ž'), ('\u{1193b}', '\u{11943}'), - ('\u{11950}', '\u{11959}'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', '\u{119d7}'), ('\u{119da}', 'đ§Ą'), @@ -6530,7 +6746,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{11a47}', '\u{11a47}'), ('đ©', '\u{11a99}'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', '\u{11c36}'), ('\u{11c38}', 'đ±'), @@ -6552,15 +6768,23 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('đ¶', 'đ¶'), ('đ¶ ', 'đ¶©'), ('đ» ', 'đ»¶'), - ('\u{11fb0}', '\u{11fb0}'), + ('\u{11f00}', 'đŒ'), + ('đŒ', '\u{11f3a}'), + ('đŒŸ', '\u{11f42}'), + ('đœ', 'đœ'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đź'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('\u{13440}', '\u{13455}'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), ('đ© ', 'đ©©'), + ('đ©°', 'đȘŸ'), + ('đ«', 'đ«'), ('đ«', 'đ«'), ('\u{16af0}', '\u{16af4}'), ('đŹ', '\u{16b36}'), @@ -6574,12 +6798,17 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{16f8f}', 'đŸ'), ('đż ', 'đżĄ'), ('đżŁ', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), @@ -6587,6 +6816,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('đČ', 'đČ'), ('đČ', 'đČ'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('đ
', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -6630,16 +6861,26 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('đ°', 'đ'), + ('\u{1e08f}', '\u{1e08f}'), ('đ', 'đŹ'), ('\u{1e130}', 'đœ'), ('đ
', 'đ
'), ('đ
', 'đ
'), + ('đ', '\u{1e2ae}'), ('đ', 'đč'), + ('đ', 'đč'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('\u{1e8d0}', '\u{1e8d6}'), ('đ€', 'đ„'), @@ -6677,14 +6918,15 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('đșĄ', 'đșŁ'), ('đș„', 'đș©'), ('đș«', 'đș»'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đŻ°', 'đŻč'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ('\u{e0100}', '\u{e01ef}'), ]; @@ -6738,8 +6980,9 @@ pub const ID_START: &'static [(char, char)] = &[ ('à š', 'à š'), ('àĄ', 'àĄ'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('àą ', 'àŁ'), ('à€', 'à€č'), ('à€œ', 'à€œ'), ('à„', 'à„'), @@ -6804,6 +7047,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('à°Ș', 'à°č'), ('à°œ', 'à°œ'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', 'à±Ą'), ('àČ', 'àČ'), ('àČ
', 'àČ'), @@ -6812,10 +7056,10 @@ pub const ID_START: &'static [(char, char)] = &[ ('àČȘ', 'àČł'), ('àČ”', 'àČč'), ('àČœ', 'àČœ'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', 'àłĄ'), ('àł±', 'àłČ'), - ('\u{d04}', 'àŽ'), + ('àŽ', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', 'àŽș'), ('àŽœ', 'àŽœ'), @@ -6883,9 +7127,8 @@ pub const ID_START: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('áź', 'áž'), - ('á', 'á'), - ('á', 'á'), - ('á ', 'á±'), + ('á', 'á'), + ('á', 'á±'), ('á', 'á'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -6905,7 +7148,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('áš ', 'á©'), ('áȘ§', 'áȘ§'), ('áŹ
', 'Ᏻ'), - ('á
', 'á'), + ('á
', 'á'), ('áź', 'áź '), ('áźź', '៯'), ('áźș', 'ᯄ'), @@ -6955,9 +7198,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â
'), ('â
', 'â'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłź'), ('âłČ', 'âłł'), ('âŽ', '⎄'), @@ -6984,11 +7225,10 @@ pub const ID_START: &'static [(char, char)] = &[ ('ăŒ', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), ('ê', 'ê'), @@ -6998,9 +7238,11 @@ pub const ID_START: &'static [(char, char)] = &[ ('ê ', 'êŻ'), ('ê', 'ê'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê '), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê '), ('ê ', 'ê
'), ('ê ', 'ê '), ('ê ', 'ê ą'), @@ -7037,7 +7279,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), ('êŹ°', 'ê'), - ('ê', '\u{ab69}'), + ('ê', 'ê©'), ('ê°', 'êŻą'), ('ê°', 'íŁ'), ('í°', 'í'), @@ -7089,9 +7331,20 @@ pub const ID_START: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -7122,19 +7375,22 @@ pub const ID_START: &'static [(char, char)] = &[ ('đČ', 'đČČ'), ('đł', 'đłČ'), ('đŽ', 'đŽŁ'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș', 'đș©'), + ('đș°', 'đș±'), ('đŒ', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', 'đœ
'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', 'đŸ'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', 'đ·'), + ('đ±', 'đČ'), + ('đ”', 'đ”'), ('đ', 'đŻ'), ('đ', 'đš'), ('đ', 'đŠ'), ('đ
', 'đ
'), - ('\u{11147}', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', 'đ
Č'), ('đ
¶', 'đ
¶'), ('đ', 'đČ'), @@ -7143,6 +7399,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ«'), + ('đż', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -7160,7 +7417,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('đ', 'đĄ'), ('đ', 'đŽ'), ('đ', 'đ'), - ('đ', '\u{11461}'), + ('đ', 'đĄ'), ('đ', 'đŻ'), ('đ', 'đ
'), ('đ', 'đ'), @@ -7171,15 +7428,16 @@ pub const ID_START: &'static [(char, char)] = &[ ('đ', 'đȘ'), ('đž', 'đž'), ('đ', 'đ'), + ('đ', 'đ'), ('đ ', 'đ «'), ('đą ', 'đŁ'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€Ż'), + ('đ€ż', 'đ€ż'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', 'đ§'), ('đ§Ą', 'đ§Ą'), @@ -7190,7 +7448,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('đ©', 'đ©'), ('đ©', 'đȘ'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', 'đ°ź'), ('đ±', 'đ±'), @@ -7204,14 +7462,20 @@ pub const ID_START: &'static [(char, char)] = &[ ('đ”Ș', 'đ¶'), ('đ¶', 'đ¶'), ('đ» ', 'đ»Č'), - ('\u{11fb0}', '\u{11fb0}'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒł'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đź'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), + ('đ©°', 'đȘŸ'), ('đ«', 'đ«'), ('đŹ', 'đŹŻ'), ('đ', 'đ'), @@ -7224,10 +7488,15 @@ pub const ID_START: &'static [(char, char)] = &[ ('đż ', 'đżĄ'), ('đżŁ', 'đżŁ'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), @@ -7264,10 +7533,19 @@ pub const ID_START: &'static [(char, char)] = &[ ('đ', 'đš'), ('đȘ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), ('đ', 'đŹ'), ('đ·', 'đœ'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ', 'đ«'), + ('đ', 'đ«'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('đ€', 'đ„'), ('đ„', 'đ„'), @@ -7304,35 +7582,37 @@ pub const ID_START: &'static [(char, char)] = &[ ('đșĄ', 'đșŁ'), ('đș„', 'đș©'), ('đș«', 'đș»'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const IDEOGRAPHIC: &'static [(char, char)] = &[ ('ă', 'ă'), ('ăĄ', 'ă©'), ('ăž', 'ăș'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), + ('ă', '䶿'), + ('äž', 'éżż'), ('ï€', 'ï©'), ('ï©°', 'ï«'), ('\u{16fe4}', '\u{16fe4}'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), ('đ
°', 'đ»'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const JOIN_CONTROL: &'static [(char, char)] = &[('\u{200c}', '\u{200d}')]; @@ -7624,7 +7904,7 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ÔŻ', 'ÔŻ'), ('Ő ', 'Ö'), ('á', 'áș'), - ('áœ', 'áż'), + ('áŒ', 'áż'), ('áž', 'áœ'), ('áČ', 'áČ'), ('áŽ', 'ᶿ'), @@ -7787,7 +8067,7 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('â
°', 'â
ż'), ('â', 'â'), ('â', 'â©'), - ('â°°', 'â±'), + ('â°°', 'â±'), ('ⱥ', 'ⱥ'), ('ⱄ', 'ⱊ'), ('ⱚ', 'ⱚ'), @@ -7955,19 +8235,34 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ê»', 'ê»'), ('êœ', 'êœ'), ('êż', 'êż'), + ('ê', 'ê'), ('ê', 'ê'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'êŽ'), + ('ê¶', 'ê¶'), ('êž', 'êș'), ('êŹ°', 'ê'), - ('ê', '\u{ab68}'), + ('ê', 'ê©'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), ('ïŹ', 'ïŹ'), ('ïœ', 'ïœ'), ('đš', 'đ'), ('đ', 'đ»'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), + ('đ', 'đ'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đł', 'đłČ'), ('đŁ', 'đŁ'), ('đč ', 'đčż'), @@ -7999,6 +8294,10 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('đȘ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), ('đ€ą', 'đ„'), ]; @@ -8224,7 +8523,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('àŻ', 'àŻ'), ('àŻ', 'àŻ'), ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', 'à°'), + ('\u{c00}', '\u{c04}'), ('\u{c3e}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4c}'), @@ -8236,6 +8535,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('àł', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), + ('àłł', 'àłł'), ('\u{d00}', 'àŽ'), ('\u{d3e}', '\u{d44}'), ('à”', 'à”'), @@ -8254,7 +8554,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{eb4}', '\u{eb9}'), ('\u{ebb}', '\u{ebc}'), ('\u{ecd}', '\u{ecd}'), - ('\u{f71}', '\u{f81}'), + ('\u{f71}', '\u{f83}'), ('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('á«', '\u{1036}'), @@ -8281,6 +8581,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('á©', '\u{1a5e}'), ('á©Ą', '\u{1a74}'), ('\u{1abf}', '\u{1ac0}'), + ('\u{1acc}', '\u{1ace}'), ('\u{1b00}', 'áŹ'), ('\u{1b35}', 'á'), ('\u{1b80}', 'áź'), @@ -8325,17 +8626,20 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{10eab}', '\u{10eac}'), ('đ', 'đ'), ('\u{11038}', '\u{11045}'), - ('đ', 'đ'), + ('\u{11073}', '\u{11074}'), + ('\u{11080}', 'đ'), ('đ°', 'đž'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11132}'), ('đ
', 'đ
'), ('\u{11180}', 'đ'), ('đł', 'đż'), - ('\u{111ce}', '\u{111cf}'), + ('đ', '\u{111cf}'), ('đŹ', '\u{11234}'), ('\u{11237}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112e8}'), ('\u{11300}', 'đ'), ('\u{1133e}', 'đ'), @@ -8354,11 +8658,11 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{116ab}', '\u{116b5}'), ('\u{1171d}', '\u{1172a}'), ('đ Ź', 'đ ž'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('\u{11930}', 'đ€”'), + ('đ€·', 'đ€ž'), ('\u{1193b}', '\u{1193c}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11942}'), + ('đ„', 'đ„'), + ('đ„', 'đ„'), ('đ§', '\u{119d7}'), ('\u{119da}', 'đ§'), ('đ§€', 'đ§€'), @@ -8381,16 +8685,21 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{11d90}', '\u{11d91}'), ('đ¶', 'đ¶'), ('\u{11ef3}', 'đ»¶'), + ('\u{11f00}', '\u{11f01}'), + ('đŒ', 'đŒ'), + ('đŒŽ', '\u{11f3a}'), + ('đŒŸ', '\u{11f40}'), ('\u{16f4f}', '\u{16f4f}'), ('đœ', 'đŸ'), ('\u{16f8f}', '\u{16f92}'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('\u{1bc9e}', '\u{1bc9e}'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e947}', '\u{1e947}'), ('đ°', 'đ
'), ('đ
', 'đ
©'), @@ -8453,6 +8762,7 @@ pub const OTHER_LOWERCASE: &'static [(char, char)] = &[ ('Ë ', 'Ë€'), ('\u{345}', '\u{345}'), ('Íș', 'Íș'), + ('áŒ', 'áŒ'), ('ᎏ', 'á”Ș'), ('ᔞ', 'ᔞ'), ('á¶', 'ᶿ'), @@ -8464,8 +8774,15 @@ pub const OTHER_LOWERCASE: &'static [(char, char)] = &[ ('ⱌ', 'ⱜ'), ('ê', 'ê'), ('ê°', 'ê°'), + ('êČ', 'êŽ'), ('êž', 'êč'), ('ê', 'ê'), + ('ê©', 'ê©'), + ('đ', 'đ'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), + ('đ°', 'đ'), ]; pub const OTHER_MATH: &'static [(char, char)] = &[ @@ -8651,13 +8968,14 @@ pub const PREPENDED_CONCATENATION_MARK: &'static [(char, char)] = &[ ('\u{600}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), ]; pub const QUOTATION_MARK: &'static [(char, char)] = &[ - ('\"', '\"'), + ('"', '"'), ('\'', '\''), ('«', '«'), ('»', '»'), @@ -8682,7 +9000,7 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('.', '.'), ('?', '?'), ('Ö', 'Ö'), - ('Ű', 'Ű'), + ('Ű', 'Ű'), ('Û', 'Û'), ('Ü', 'Ü'), ('ßč', 'ßč'), @@ -8701,12 +9019,14 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('áȘš', 'áȘ«'), ('á', 'á'), ('á', 'á'), + ('áœ', 'áŸ'), ('á°»', 'á°Œ'), ('ᱟ', '᱿'), ('âŒ', 'âœ'), ('â', 'â'), ('âžź', 'âžź'), ('➌', '➌'), + ('âč', 'âč'), ('ă', 'ă'), ('êż', 'êż'), ('ê', 'ê'), @@ -8727,6 +9047,7 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('ïœĄ', 'ïœĄ'), ('đ©', 'đ©'), ('đœ', 'đœ'), + ('đŸ', 'đŸ'), ('đ', 'đ'), ('đŸ', 'đ'), ('đ
', 'đ
'), @@ -8741,12 +9062,13 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đ'), ('đŒ', 'đŸ'), - ('\u{11944}', '\u{11944}'), - ('\u{11946}', '\u{11946}'), + ('đ„', 'đ„'), + ('đ„', 'đ„'), ('đ©', 'đ©'), ('đȘ', 'đȘ'), ('đ±', 'đ±'), ('đ»·', 'đ»ž'), + ('đœ', 'đœ'), ('đ©ź', 'đ©Ż'), ('đ«”', 'đ«”'), ('đŹ·', 'đŹž'), @@ -8788,6 +9110,9 @@ pub const SOFT_DOTTED: &'static [(char, char)] = &[ ('đȘ', 'đ«'), ('đ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đ', 'đ'), + ('đš', 'đš'), ]; pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ @@ -8802,7 +9127,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('Ś', 'Ś'), ('Ű', 'Ű'), ('Ű', 'Ű'), - ('Ű', 'Ű'), + ('Ű', 'Ű'), ('Û', 'Û'), ('Ü', 'Ü'), ('Ü', 'Ü'), @@ -8826,6 +9151,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('áȘš', 'áȘ«'), ('á', 'á'), ('á', 'á'), + ('áœ', 'áŸ'), ('á°»', 'á°ż'), ('ᱟ', '᱿'), ('âŒ', 'âœ'), @@ -8835,6 +9161,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('âč', 'âč'), ('âč', 'âč'), ('âč', 'âč'), + ('âč', 'âč'), ('ă', 'ă'), ('êŸ', 'êż'), ('ê', 'ê'), @@ -8865,6 +9192,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('đŹș', 'đŹż'), ('đź', 'đź'), ('đœ', 'đœ'), + ('đŸ', 'đŸ'), ('đ', 'đ'), ('đŸ', 'đ'), ('đ
', 'đ
'), @@ -8874,19 +9202,20 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('đž', 'đŒ'), ('đ©', 'đ©'), ('đ', 'đ'), - ('\u{1145a}', 'đ'), + ('đ', 'đ'), ('đ', 'đ
'), ('đ', 'đ'), ('đ', 'đ'), ('đŒ', 'đŸ'), - ('\u{11944}', '\u{11944}'), - ('\u{11946}', '\u{11946}'), + ('đ„', 'đ„'), + ('đ„', 'đ„'), ('đ©', 'đ©'), ('đȘ', 'đȘ'), ('đȘĄ', 'đȘą'), ('đ±', 'đ±'), ('đ±±', 'đ±±'), ('đ»·', 'đ»ž'), + ('đœ', 'đœ'), ('đ°', 'đŽ'), ('đ©ź', 'đ©Ż'), ('đ«”', 'đ«”'), @@ -8898,8 +9227,8 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ]; pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[ - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), + ('ă', '䶿'), + ('äž', 'éżż'), ('ïš', 'ïš'), ('ïš', 'ïš'), ('ïš', 'ïš'), @@ -8907,12 +9236,13 @@ pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[ ('ïšĄ', 'ïšĄ'), ('ïšŁ', ''), ('', 'ïš©'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const UPPERCASE: &'static [(char, char)] = &[ @@ -9349,7 +9679,7 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('â
', 'â
Ż'), ('â', 'â'), ('â¶', 'â'), - ('â°', 'â°ź'), + ('â°', 'â°Ż'), ('â± ', 'â± '), ('ⱹ', 'â±€'), ('Ⱨ', 'Ⱨ'), @@ -9514,13 +9844,21 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('êș', 'êș'), ('êŒ', 'êŒ'), ('êŸ', 'êŸ'), + ('ê', 'ê'), ('ê', 'ê'), - ('ê', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê”', 'ê”'), ('ïŒĄ', 'ïŒș'), ('đ', 'đ§'), ('đ°', 'đ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), ('đČ', 'đČČ'), ('đą ', 'đąż'), ('đč', 'đč'), @@ -9563,6 +9901,7 @@ pub const UPPERCASE: &'static [(char, char)] = &[ pub const VARIATION_SELECTOR: &'static [(char, char)] = &[ ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{fe00}', '\u{fe0f}'), ('\u{e0100}', '\u{e01ef}'), ]; @@ -9632,9 +9971,9 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('à ', '\u{82d}'), ('àĄ', '\u{85b}'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('à„Š', 'à„Ż'), ('à„±', 'àŠ'), @@ -9718,11 +10057,12 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('à°', 'à°'), ('à°', 'à°š'), ('à°Ș', 'à°č'), - ('à°œ', 'à±'), + ('\u{c3c}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', '\u{c63}'), ('ొ', 'à±Ż'), ('àČ', 'àČ'), @@ -9735,10 +10075,10 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{cc6}', 'àł'), ('àł', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', '\u{ce3}'), ('àłŠ', 'àłŻ'), - ('àł±', 'àłČ'), + ('àł±', 'àłł'), ('\u{d00}', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', '\u{d44}'), @@ -9771,7 +10111,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('àș§', 'àșœ'), ('à»', 'à»'), ('à»', 'à»'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('à»', 'à»'), ('à»', 'à»'), ('àŒ', 'àŒ'), @@ -9818,9 +10158,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('áź', 'áž'), - ('á', 'á'), - ('á', '\u{1714}'), - ('á ', '\u{1734}'), + ('á', 'á'), + ('á', 'áŽ'), ('á', '\u{1753}'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -9830,7 +10169,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('á', '\u{17dd}'), ('á ', 'á©'), ('\u{180b}', '\u{180d}'), - ('á ', 'á '), + ('\u{180f}', 'á '), ('á ', 'ᥞ'), ('áą', 'áąȘ'), ('áą°', 'ᣔ'), @@ -9849,8 +10188,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('áȘ', 'áȘ'), ('áȘ§', 'áȘ§'), ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ac0}'), - ('\u{1b00}', 'á'), + ('\u{1abf}', '\u{1ace}'), + ('\u{1b00}', 'á'), ('á', 'á'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '᯳'), @@ -9862,8 +10201,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('áČœ', 'áČż'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', 'áłș'), - ('áŽ', '\u{1df9}'), - ('\u{1dfb}', 'áŒ'), + ('áŽ', 'áŒ'), ('áŒ', 'áŒ'), ('ጠ', 'áœ
'), ('áœ', 'áœ'), @@ -9903,9 +10241,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â
'), ('â
', 'â'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłł'), ('âŽ', '⎄'), ('⎧', '⎧'), @@ -9933,11 +10269,10 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ăŒ', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), ('ê', 'ê«'), @@ -9946,9 +10281,11 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('êż', '\u{a6f1}'), ('ê', 'ê'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê §'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê §'), ('\u{a82c}', '\u{a82c}'), ('êĄ', 'êĄł'), ('êą', '\u{a8c5}'), @@ -9975,7 +10312,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), ('êŹ°', 'ê'), - ('ê', '\u{ab69}'), + ('ê', 'ê©'), ('ê°', 'êŻȘ'), ('êŻŹ', '\u{abed}'), ('êŻ°', 'êŻč'), @@ -10043,9 +10380,20 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -10080,31 +10428,33 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('đł', 'đłČ'), ('đŽ', '\u{10d27}'), ('đŽ°', 'đŽč'), - ('\u{10e80}', '\u{10ea9}'), + ('đș', 'đș©'), ('\u{10eab}', '\u{10eac}'), - ('\u{10eb0}', '\u{10eb1}'), - ('đŒ', 'đŒ'), + ('đș°', 'đș±'), + ('\u{10efd}', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', '\u{10f50}'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', '\u{10f85}'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', '\u{11046}'), - ('đŠ', 'đŻ'), + ('đŠ', 'đ”'), ('\u{1107f}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('đ', 'đš'), ('đ°', 'đč'), ('\u{11100}', '\u{11134}'), ('đ¶', 'đż'), - ('đ
', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', '\u{11173}'), ('đ
¶', 'đ
¶'), ('\u{11180}', 'đ'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', 'đ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -10129,7 +10479,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{11370}', '\u{11374}'), ('đ', 'đ'), ('đ', 'đ'), - ('\u{1145e}', '\u{11461}'), + ('\u{1145e}', 'đĄ'), ('đ', 'đ
'), ('đ', 'đ'), ('đ', 'đ'), @@ -10144,16 +10494,17 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('đ', 'đ'), ('\u{1171d}', '\u{1172b}'), ('đ°', 'đč'), + ('đ', 'đ'), ('đ ', '\u{1183a}'), ('đą ', 'đŁ©'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€”'), + ('đ€·', 'đ€ž'), ('\u{1193b}', '\u{11943}'), - ('\u{11950}', '\u{11959}'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', '\u{119d7}'), ('\u{119da}', 'đ§Ą'), @@ -10162,7 +10513,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{11a47}', '\u{11a47}'), ('đ©', '\u{11a99}'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', '\u{11c36}'), ('\u{11c38}', 'đ±'), @@ -10184,15 +10535,23 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('đ¶', 'đ¶'), ('đ¶ ', 'đ¶©'), ('đ» ', 'đ»¶'), - ('\u{11fb0}', '\u{11fb0}'), + ('\u{11f00}', 'đŒ'), + ('đŒ', '\u{11f3a}'), + ('đŒŸ', '\u{11f42}'), + ('đœ', 'đœ'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đź'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('\u{13440}', '\u{13455}'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), ('đ© ', 'đ©©'), + ('đ©°', 'đȘŸ'), + ('đ«', 'đ«'), ('đ«', 'đ«'), ('\u{16af0}', '\u{16af4}'), ('đŹ', '\u{16b36}'), @@ -10206,12 +10565,17 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{16f8f}', 'đŸ'), ('đż ', 'đżĄ'), ('đżŁ', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), @@ -10219,6 +10583,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('đČ', 'đČ'), ('đČ', 'đČ'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('đ
', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -10262,16 +10628,26 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('đ°', 'đ'), + ('\u{1e08f}', '\u{1e08f}'), ('đ', 'đŹ'), ('\u{1e130}', 'đœ'), ('đ
', 'đ
'), ('đ
', 'đ
'), + ('đ', '\u{1e2ae}'), ('đ', 'đč'), + ('đ', 'đč'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('\u{1e8d0}', '\u{1e8d6}'), ('đ€', 'đ„'), @@ -10309,14 +10685,15 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('đșĄ', 'đșŁ'), ('đș„', 'đș©'), ('đș«', 'đș»'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đŻ°', 'đŻč'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ('\u{e0100}', '\u{e01ef}'), ]; @@ -10370,8 +10747,9 @@ pub const XID_START: &'static [(char, char)] = &[ ('à š', 'à š'), ('àĄ', 'àĄ'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('àą ', 'àŁ'), ('à€', 'à€č'), ('à€œ', 'à€œ'), ('à„', 'à„'), @@ -10436,6 +10814,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('à°Ș', 'à°č'), ('à°œ', 'à°œ'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', 'à±Ą'), ('àČ', 'àČ'), ('àČ
', 'àČ'), @@ -10444,10 +10823,10 @@ pub const XID_START: &'static [(char, char)] = &[ ('àČȘ', 'àČł'), ('àČ”', 'àČč'), ('àČœ', 'àČœ'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', 'àłĄ'), ('àł±', 'àłČ'), - ('\u{d04}', 'àŽ'), + ('àŽ', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', 'àŽș'), ('àŽœ', 'àŽœ'), @@ -10515,9 +10894,8 @@ pub const XID_START: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('áź', 'áž'), - ('á', 'á'), - ('á', 'á'), - ('á ', 'á±'), + ('á', 'á'), + ('á', 'á±'), ('á', 'á'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -10537,7 +10915,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('áš ', 'á©'), ('áȘ§', 'áȘ§'), ('áŹ
', 'Ᏻ'), - ('á
', 'á'), + ('á
', 'á'), ('áź', 'áź '), ('áźź', '៯'), ('áźș', 'ᯄ'), @@ -10587,9 +10965,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â
'), ('â
', 'â'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłź'), ('âłČ', 'âłł'), ('âŽ', '⎄'), @@ -10616,11 +10992,10 @@ pub const XID_START: &'static [(char, char)] = &[ ('ăŒ', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), ('ê', 'ê'), @@ -10630,9 +11005,11 @@ pub const XID_START: &'static [(char, char)] = &[ ('ê ', 'êŻ'), ('ê', 'ê'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê '), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê '), ('ê ', 'ê
'), ('ê ', 'ê '), ('ê ', 'ê ą'), @@ -10669,7 +11046,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), ('êŹ°', 'ê'), - ('ê', '\u{ab69}'), + ('ê', 'ê©'), ('ê°', 'êŻą'), ('ê°', 'íŁ'), ('í°', 'í'), @@ -10728,9 +11105,20 @@ pub const XID_START: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -10761,19 +11149,22 @@ pub const XID_START: &'static [(char, char)] = &[ ('đČ', 'đČČ'), ('đł', 'đłČ'), ('đŽ', 'đŽŁ'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș', 'đș©'), + ('đș°', 'đș±'), ('đŒ', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', 'đœ
'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', 'đŸ'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', 'đ·'), + ('đ±', 'đČ'), + ('đ”', 'đ”'), ('đ', 'đŻ'), ('đ', 'đš'), ('đ', 'đŠ'), ('đ
', 'đ
'), - ('\u{11147}', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', 'đ
Č'), ('đ
¶', 'đ
¶'), ('đ', 'đČ'), @@ -10782,6 +11173,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ«'), + ('đż', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -10799,7 +11191,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('đ', 'đĄ'), ('đ', 'đŽ'), ('đ', 'đ'), - ('đ', '\u{11461}'), + ('đ', 'đĄ'), ('đ', 'đŻ'), ('đ', 'đ
'), ('đ', 'đ'), @@ -10810,15 +11202,16 @@ pub const XID_START: &'static [(char, char)] = &[ ('đ', 'đȘ'), ('đž', 'đž'), ('đ', 'đ'), + ('đ', 'đ'), ('đ ', 'đ «'), ('đą ', 'đŁ'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€Ż'), + ('đ€ż', 'đ€ż'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', 'đ§'), ('đ§Ą', 'đ§Ą'), @@ -10829,7 +11222,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('đ©', 'đ©'), ('đ©', 'đȘ'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', 'đ°ź'), ('đ±', 'đ±'), @@ -10843,14 +11236,20 @@ pub const XID_START: &'static [(char, char)] = &[ ('đ”Ș', 'đ¶'), ('đ¶', 'đ¶'), ('đ» ', 'đ»Č'), - ('\u{11fb0}', '\u{11fb0}'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒł'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đź'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), + ('đ©°', 'đȘŸ'), ('đ«', 'đ«'), ('đŹ', 'đŹŻ'), ('đ', 'đ'), @@ -10863,10 +11262,15 @@ pub const XID_START: &'static [(char, char)] = &[ ('đż ', 'đżĄ'), ('đżŁ', 'đżŁ'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), @@ -10903,10 +11307,19 @@ pub const XID_START: &'static [(char, char)] = &[ ('đ', 'đš'), ('đȘ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), ('đ', 'đŹ'), ('đ·', 'đœ'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ', 'đ«'), + ('đ', 'đ«'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('đ€', 'đ„'), ('đ„', 'đ„'), @@ -10943,11 +11356,12 @@ pub const XID_START: &'static [(char, char)] = &[ ('đșĄ', 'đșŁ'), ('đș„', 'đș©'), ('đș«', 'đș»'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; diff --git a/src/unicode_tables/property_names.rs b/src/unicode_tables/property_names.rs index 6393df2..599a123 100644 --- a/src/unicode_tables/property_names.rs +++ b/src/unicode_tables/property_names.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-names ucd-13.0.0 +// ucd-generate property-names ucd-15.0.0 // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("age", "Age"), diff --git a/src/unicode_tables/property_values.rs b/src/unicode_tables/property_values.rs index c46653a..cb2d32f 100644 --- a/src/unicode_tables/property_values.rs +++ b/src/unicode_tables/property_values.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-values ucd-13.0.0 --include gc,script,scx,age,gcb,wb,sb +// ucd-generate property-values ucd-15.0.0 --include gc,script,scx,age,gcb,wb,sb // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const PROPERTY_VALUES: &'static [( &'static str, @@ -19,6 +19,8 @@ pub const PROPERTY_VALUES: &'static [( ("12.0", "V12_0"), ("12.1", "V12_1"), ("13.0", "V13_0"), + ("14.0", "V14_0"), + ("15.0", "V15_0"), ("2.0", "V2_0"), ("2.1", "V2_1"), ("3.0", "V3_0"), @@ -44,6 +46,8 @@ pub const PROPERTY_VALUES: &'static [( ("v120", "V12_0"), ("v121", "V12_1"), ("v130", "V13_0"), + ("v140", "V14_0"), + ("v150", "V15_0"), ("v20", "V2_0"), ("v21", "V2_1"), ("v30", "V3_0"), @@ -233,9 +237,11 @@ pub const PROPERTY_VALUES: &'static [( ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"), + ("cpmn", "Cypro_Minoan"), ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"), + ("cyprominoan", "Cypro_Minoan"), ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"), @@ -304,6 +310,7 @@ pub const PROPERTY_VALUES: &'static [( ("kannada", "Kannada"), ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"), + ("kawi", "Kawi"), ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"), ("kharoshthi", "Kharoshthi"), @@ -368,6 +375,8 @@ pub const PROPERTY_VALUES: &'static [( ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"), + ("nagm", "Nag_Mundari"), + ("nagmundari", "Nag_Mundari"), ("nand", "Nandinagari"), ("nandinagari", "Nandinagari"), ("narb", "Old_North_Arabian"), @@ -391,6 +400,7 @@ pub const PROPERTY_VALUES: &'static [( ("oldsogdian", "Old_Sogdian"), ("oldsoutharabian", "Old_South_Arabian"), ("oldturkic", "Old_Turkic"), + ("olduyghur", "Old_Uyghur"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"), ("orya", "Oriya"), @@ -398,6 +408,7 @@ pub const PROPERTY_VALUES: &'static [( ("osge", "Osage"), ("osma", "Osmanya"), ("osmanya", "Osmanya"), + ("ougr", "Old_Uyghur"), ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"), ("palmyrene", "Palmyrene"), @@ -462,6 +473,7 @@ pub const PROPERTY_VALUES: &'static [( ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"), + ("tangsa", "Tangsa"), ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"), @@ -476,11 +488,15 @@ pub const PROPERTY_VALUES: &'static [( ("tifinagh", "Tifinagh"), ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), + ("tnsa", "Tangsa"), + ("toto", "Toto"), ("ugar", "Ugaritic"), ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"), ("vaii", "Vai"), + ("vith", "Vithkuqi"), + ("vithkuqi", "Vithkuqi"), ("wancho", "Wancho"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"), @@ -550,9 +566,11 @@ pub const PROPERTY_VALUES: &'static [( ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"), + ("cpmn", "Cypro_Minoan"), ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"), + ("cyprominoan", "Cypro_Minoan"), ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"), @@ -621,6 +639,7 @@ pub const PROPERTY_VALUES: &'static [( ("kannada", "Kannada"), ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"), + ("kawi", "Kawi"), ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"), ("kharoshthi", "Kharoshthi"), @@ -685,6 +704,8 @@ pub const PROPERTY_VALUES: &'static [( ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"), + ("nagm", "Nag_Mundari"), + ("nagmundari", "Nag_Mundari"), ("nand", "Nandinagari"), ("nandinagari", "Nandinagari"), ("narb", "Old_North_Arabian"), @@ -708,6 +729,7 @@ pub const PROPERTY_VALUES: &'static [( ("oldsogdian", "Old_Sogdian"), ("oldsoutharabian", "Old_South_Arabian"), ("oldturkic", "Old_Turkic"), + ("olduyghur", "Old_Uyghur"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"), ("orya", "Oriya"), @@ -715,6 +737,7 @@ pub const PROPERTY_VALUES: &'static [( ("osge", "Osage"), ("osma", "Osmanya"), ("osmanya", "Osmanya"), + ("ougr", "Old_Uyghur"), ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"), ("palmyrene", "Palmyrene"), @@ -779,6 +802,7 @@ pub const PROPERTY_VALUES: &'static [( ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"), + ("tangsa", "Tangsa"), ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"), @@ -793,11 +817,15 @@ pub const PROPERTY_VALUES: &'static [( ("tifinagh", "Tifinagh"), ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), + ("tnsa", "Tangsa"), + ("toto", "Toto"), ("ugar", "Ugaritic"), ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"), ("vaii", "Vai"), + ("vith", "Vithkuqi"), + ("vithkuqi", "Vithkuqi"), ("wancho", "Wancho"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"), diff --git a/src/unicode_tables/script.rs b/src/unicode_tables/script.rs index cd86cba..cc5c400 100644 --- a/src/unicode_tables/script.rs +++ b/src/unicode_tables/script.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate script ucd-13.0.0 --chars +// ucd-generate script ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Adlam", ADLAM), @@ -35,6 +35,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Coptic", COPTIC), ("Cuneiform", CUNEIFORM), ("Cypriot", CYPRIOT), + ("Cypro_Minoan", CYPRO_MINOAN), ("Cyrillic", CYRILLIC), ("Deseret", DESERET), ("Devanagari", DEVANAGARI), @@ -68,6 +69,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Kaithi", KAITHI), ("Kannada", KANNADA), ("Katakana", KATAKANA), + ("Kawi", KAWI), ("Kayah_Li", KAYAH_LI), ("Kharoshthi", KHAROSHTHI), ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT), @@ -102,6 +104,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Multani", MULTANI), ("Myanmar", MYANMAR), ("Nabataean", NABATAEAN), + ("Nag_Mundari", NAG_MUNDARI), ("Nandinagari", NANDINAGARI), ("New_Tai_Lue", NEW_TAI_LUE), ("Newa", NEWA), @@ -118,6 +121,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Old_Sogdian", OLD_SOGDIAN), ("Old_South_Arabian", OLD_SOUTH_ARABIAN), ("Old_Turkic", OLD_TURKIC), + ("Old_Uyghur", OLD_UYGHUR), ("Oriya", ORIYA), ("Osage", OSAGE), ("Osmanya", OSMANYA), @@ -149,6 +153,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tai_Viet", TAI_VIET), ("Takri", TAKRI), ("Tamil", TAMIL), + ("Tangsa", TANGSA), ("Tangut", TANGUT), ("Telugu", TELUGU), ("Thaana", THAANA), @@ -156,8 +161,10 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tibetan", TIBETAN), ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), + ("Toto", TOTO), ("Ugaritic", UGARITIC), ("Vai", VAI), + ("Vithkuqi", VITHKUQI), ("Wancho", WANCHO), ("Warang_Citi", WARANG_CITI), ("Yezidi", YEZIDI), @@ -169,7 +176,7 @@ pub const ADLAM: &'static [(char, char)] = &[('đ€', 'đ„'), ('đ„', 'đ„'), ('đ„', 'đ„')]; pub const AHOM: &'static [(char, char)] = - &[('đ', 'đ'), ('\u{1171d}', '\u{1172b}'), ('đ°', 'đż')]; + &[('đ', 'đ'), ('\u{1171d}', '\u{1172b}'), ('đ°', 'đ')]; pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('đ', 'đ')]; @@ -177,26 +184,27 @@ pub const ARABIC: &'static [(char, char)] = &[ ('\u{600}', '\u{604}'), ('Ű', 'Ű'), ('Ű', '\u{61a}'), - ('\u{61c}', '\u{61c}'), - ('Ű', 'Ű'), + ('\u{61c}', 'Ű'), ('Ű ', 'Űż'), ('Ù', 'Ù'), ('\u{656}', 'ÙŻ'), ('Ù±', '\u{6dc}'), ('Û', 'Ûż'), ('Ę', 'Ęż'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('àĄ°', 'àą'), + ('\u{890}', '\u{891}'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'), - ('ï', 'ïŻ'), + ('ï', 'ïŻ'), ('ïŻ', ''), - ('ï”', 'ï¶'), + ('ï”', 'ï¶'), ('ï¶', 'ï·'), - ('ï·°', 'ï·œ'), + ('ï·', 'ï·'), + ('ï·°', 'ï·ż'), ('ïč°', 'ïčŽ'), ('ïč¶', 'ﻌ'), ('đč ', 'đčŸ'), + ('\u{10efd}', '\u{10eff}'), ('đž', 'đž'), ('đž
', 'đž'), ('đžĄ', 'đžą'), @@ -238,7 +246,7 @@ pub const ARMENIAN: &'static [(char, char)] = pub const AVESTAN: &'static [(char, char)] = &[('đŹ', 'đŹ”'), ('đŹč', 'đŹż')]; -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'á'), ('á', 'áŒ')]; +pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'á'), ('á', 'áŸ')]; pub const BAMUM: &'static [(char, char)] = &[('ê ', 'ê·'), ('đ ', 'đšž')]; @@ -268,10 +276,10 @@ pub const BHAIKSUKI: &'static [(char, char)] = &[('đ°', 'đ°'), ('đ°', '\u{11c36}'), ('\u{11c38}', 'đ±
'), ('đ±', 'đ±Ź')]; pub const BOPOMOFO: &'static [(char, char)] = - &[('ËȘ', 'Ë«'), ('ă
', 'ăŻ'), ('ă ', '\u{31bf}')]; + &[('ËȘ', 'Ë«'), ('ă
', 'ăŻ'), ('ă ', 'ăż')]; pub const BRAHMI: &'static [(char, char)] = - &[('đ', 'đ'), ('đ', 'đŻ'), ('\u{1107f}', '\u{1107f}')]; + &[('đ', 'đ'), ('đ', 'đ”'), ('\u{1107f}', '\u{1107f}')]; pub const BRAILLE: &'static [(char, char)] = &[('â ', '⣿')]; @@ -280,7 +288,7 @@ pub const BUGINESE: &'static [(char, char)] = &[('áš', '\u{1a1b}'), ('áš', 'áš pub const BUHID: &'static [(char, char)] = &[('á', '\u{1753}')]; pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = - &[('á', 'áż'), ('áą°', 'ᣔ')]; + &[('á', 'áż'), ('áą°', 'ᣔ'), ('đȘ°', 'đȘż')]; pub const CARIAN: &'static [(char, char)] = &[('đ ', 'đ')]; @@ -288,7 +296,7 @@ pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[('đ°', 'đŁ'), ('đŻ', 'đŻ')]; pub const CHAKMA: &'static [(char, char)] = - &[('\u{11100}', '\u{11134}'), ('đ¶', '\u{11147}')]; + &[('\u{11100}', '\u{11134}'), ('đ¶', 'đ
')]; pub const CHAM: &'static [(char, char)] = &[('êš', '\u{aa36}'), ('ê©', 'ê©'), ('ê©', 'ê©'), ('ê©', 'ê©')]; @@ -296,10 +304,10 @@ pub const CHAM: &'static [(char, char)] = pub const CHEROKEE: &'static [(char, char)] = &[('á ', 'á”'), ('áž', 'áœ'), ('ê°', 'êźż')]; -pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')]; +pub const CHORASMIAN: &'static [(char, char)] = &[('đŸ°', 'đż')]; pub const COMMON: &'static [(char, char)] = &[ - ('\u{0}', '@'), + ('\0', '@'), ('[', '`'), ('{', '©'), ('«', 'Âč'), @@ -339,7 +347,7 @@ pub const COMMON: &'static [(char, char)] = &[ ('\u{2066}', 'â°'), ('âŽ', 'âŸ'), ('â', 'â'), - ('â ', 'âż'), + ('â ', 'â'), ('â', 'â„'), ('â§', 'â©'), ('âŹ', 'â±'), @@ -351,8 +359,8 @@ pub const COMMON: &'static [(char, char)] = &[ ('â ', 'âż'), ('â€', 'âł'), ('â¶', 'âź'), - ('\u{2b97}', '⯿'), - ('âž', '\u{2e52}'), + ('âź', '⯿'), + ('âž', 'âč'), ('âż°', 'âż»'), ('\u{3000}', 'ă'), ('ă', 'ă'), @@ -375,7 +383,7 @@ pub const COMMON: &'static [(char, char)] = &[ ('ê€ź', 'ê€ź'), ('ê§', 'ê§'), ('ê', 'ê'), - ('\u{ab6a}', '\u{ab6b}'), + ('êȘ', 'ê«'), ('', 'ïŽż'), ('ïž', 'ïž'), ('ïž°', 'ïč'), @@ -393,18 +401,19 @@ pub const COMMON: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đł'), ('đ·', 'đż'), - ('đ', '\u{1019c}'), + ('đ', 'đ'), ('đ', 'đŒ'), ('đĄ', 'đ»'), - ('đżą', 'đżŁ'), ('\u{1bca0}', '\u{1bca3}'), + ('đœ', 'đż'), ('đ', 'đ”'), ('đ', 'đŠ'), ('đ©', 'đ
Š'), ('đ
Ș', '\u{1d17a}'), ('đ', 'đ'), ('đ', 'đ©'), - ('đź', 'đš'), + ('đź', 'đȘ'), + ('đ', 'đ'), ('đ ', 'đł'), ('đ', 'đ'), ('đ ', 'đž'), @@ -437,39 +446,38 @@ pub const COMMON: &'static [(char, char)] = &[ ('đ±', 'đż'), ('đ', 'đ'), ('đ', 'đ”'), - ('đ', '\u{1f1ad}'), + ('đ', 'đ'), ('đŠ', 'đż'), ('đ', 'đ'), ('đ', 'đ»'), ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đ„'), - ('đ', '\u{1f6d7}'), - ('đ ', 'đŹ'), - ('đ°', '\u{1f6fc}'), - ('đ', 'đł'), - ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đŹ'), + ('đ°', 'đŒ'), + ('đ', 'đ¶'), + ('đ»', 'đ'), ('đ ', 'đ«'), + ('đ°', 'đ°'), ('đ ', 'đ '), ('đ ', 'đĄ'), ('đĄ', 'đĄ'), ('đĄ ', 'đą'), ('đą', 'đą'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('đ€', '\u{1f978}'), - ('đ„ș', '\u{1f9cb}'), - ('đ§', 'đ©'), + ('đą°', 'đą±'), + ('đ€', 'đ©'), ('đ© ', 'đ©'), - ('đ©°', '\u{1fa74}'), - ('đ©ž', 'đ©ș'), - ('đȘ', '\u{1fa86}'), - ('đȘ', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('đ©°', 'đ©Œ'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘœ'), + ('đȘż', 'đ«
'), + ('đ«', 'đ«'), + ('đ« ', 'đ«š'), + ('đ«°', 'đ«ž'), + ('đŹ', 'đź'), + ('đź', 'đŻ'), + ('đŻ°', 'đŻč'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'), ]; @@ -483,6 +491,8 @@ pub const CUNEIFORM: &'static [(char, char)] = pub const CYPRIOT: &'static [(char, char)] = &[('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), ('đ ·', 'đ ž'), ('đ Œ', 'đ Œ'), ('đ ż', 'đ ż')]; +pub const CYPRO_MINOAN: &'static [(char, char)] = &[('đŸ', 'đżČ')]; + pub const CYRILLIC: &'static [(char, char)] = &[ ('Đ', '\u{484}'), ('\u{487}', 'ÔŻ'), @@ -492,6 +502,8 @@ pub const CYRILLIC: &'static [(char, char)] = &[ ('\u{2de0}', '\u{2dff}'), ('ê', '\u{a69f}'), ('\u{fe2e}', '\u{fe2f}'), + ('đ°', 'đ'), + ('\u{1e08f}', '\u{1e08f}'), ]; pub const DESERET: &'static [(char, char)] = &[('đ', 'đ')]; @@ -501,17 +513,18 @@ pub const DEVANAGARI: &'static [(char, char)] = &[ ('\u{955}', '\u{963}'), ('à„Š', 'à„ż'), ('\u{a8e0}', '\u{a8ff}'), + ('đŹ', 'đŹ'), ]; pub const DIVES_AKURU: &'static [(char, char)] = &[ - ('\u{11900}', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193b}', '\u{11946}'), - ('\u{11950}', '\u{11959}'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€”'), + ('đ€·', 'đ€ž'), + ('\u{1193b}', 'đ„'), + ('đ„', 'đ„'), ]; pub const DOGRA: &'static [(char, char)] = &[('đ ', 'đ »')]; @@ -520,7 +533,7 @@ pub const DUPLOYAN: &'static [(char, char)] = &[('đ°', 'đ±Ș'), ('đ±°', 'đ±Œ'), ('đČ', 'đČ'), ('đČ', 'đČ'), ('đČ', 'đČ')]; pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('đ', 'đź'), ('\u{13430}', '\u{13438}')]; + &[('đ', '\u{13455}')]; pub const ELBASAN: &'static [(char, char)] = &[('đ', 'đ§')]; @@ -559,6 +572,10 @@ pub const ETHIOPIC: &'static [(char, char)] = &[ ('êŹ', 'êŹ'), ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ]; pub const GEORGIAN: &'static [(char, char)] = &[ @@ -575,8 +592,7 @@ pub const GEORGIAN: &'static [(char, char)] = &[ ]; pub const GLAGOLITIC: &'static [(char, char)] = &[ - ('â°', 'â°ź'), - ('â°°', 'â±'), + ('â°', 'â±'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), @@ -696,18 +712,20 @@ pub const HAN: &'static [(char, char)] = &[ ('ă', 'ă'), ('ăĄ', 'ă©'), ('ăž', 'ă»'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), + ('ă', '䶿'), + ('äž', 'éżż'), ('ï€', 'ï©'), ('ï©°', 'ï«'), - ('\u{16ff0}', '\u{16ff1}'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đżą', 'đżŁ'), + ('đż°', 'đż±'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const HANGUL: &'static [(char, char)] = &[ @@ -730,7 +748,7 @@ pub const HANGUL: &'static [(char, char)] = &[ pub const HANIFI_ROHINGYA: &'static [(char, char)] = &[('đŽ', '\u{10d27}'), ('đŽ°', 'đŽč')]; -pub const HANUNOO: &'static [(char, char)] = &[('á ', '\u{1734}')]; +pub const HANUNOO: &'static [(char, char)] = &[('á ', 'áŽ')]; pub const HATRAN: &'static [(char, char)] = &[('đŁ ', 'đŁČ'), ('đŁŽ', 'đŁ”'), ('đŁ»', 'đŁż')]; @@ -747,8 +765,14 @@ pub const HEBREW: &'static [(char, char)] = &[ ('ï', 'ï'), ]; -pub const HIRAGANA: &'static [(char, char)] = - &[('ă', 'ă'), ('ă', 'ă'), ('đ', 'đ'), ('đ
', 'đ
'), ('đ', 'đ')]; +pub const HIRAGANA: &'static [(char, char)] = &[ + ('ă', 'ă'), + ('ă', 'ă'), + ('đ', 'đ'), + ('đČ', 'đČ'), + ('đ
', 'đ
'), + ('đ', 'đ'), +]; pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = &[('đĄ', 'đĄ'), ('đĄ', 'đĄ')]; @@ -759,15 +783,14 @@ pub const INHERITED: &'static [(char, char)] = &[ ('\u{64b}', '\u{655}'), ('\u{670}', '\u{670}'), ('\u{951}', '\u{954}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce0}'), ('\u{1ce2}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200d}'), ('\u{20d0}', '\u{20f0}'), ('\u{302a}', '\u{302d}'), @@ -777,6 +800,8 @@ pub const INHERITED: &'static [(char, char)] = &[ ('\u{101fd}', '\u{101fd}'), ('\u{102e0}', '\u{102e0}'), ('\u{1133b}', '\u{1133b}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), @@ -794,7 +819,7 @@ pub const JAVANESE: &'static [(char, char)] = &[('\u{a980}', 'ê§'), ('ê§', 'ê§'), ('ê§', 'ê§')]; pub const KAITHI: &'static [(char, char)] = - &[('\u{11080}', 'đ'), ('\u{110cd}', '\u{110cd}')]; + &[('\u{11080}', '\u{110c2}'), ('\u{110cd}', '\u{110cd}')]; pub const KANNADA: &'static [(char, char)] = &[ ('àČ', 'àČ'), @@ -806,10 +831,10 @@ pub const KANNADA: &'static [(char, char)] = &[ ('\u{cc6}', 'àł'), ('àł', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', '\u{ce3}'), ('àłŠ', 'àłŻ'), - ('àł±', 'àłČ'), + ('àł±', 'àłł'), ]; pub const KATAKANA: &'static [(char, char)] = &[ @@ -820,10 +845,18 @@ pub const KATAKANA: &'static [(char, char)] = &[ ('ă', 'ă'), ('', 'ïœŻ'), ('', 'ïŸ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), ('đ', 'đ'), + ('đ ', 'đą'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ]; +pub const KAWI: &'static [(char, char)] = + &[('\u{11f00}', 'đŒ'), ('đŒ', '\u{11f3a}'), ('đŒŸ', 'đœ')]; + pub const KAYAH_LI: &'static [(char, char)] = &[('ê€', '\u{a92d}'), ('ê€Ż', 'ê€Ż')]; pub const KHAROSHTHI: &'static [(char, char)] = &[ @@ -838,12 +871,12 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[ ]; pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')]; + &[('\u{16fe4}', '\u{16fe4}'), ('đŹ', 'đł')]; pub const KHMER: &'static [(char, char)] = &[('á', '\u{17dd}'), ('á ', 'á©'), ('á°', 'áč'), ('᧠', '᧿')]; -pub const KHOJKI: &'static [(char, char)] = &[('đ', 'đ'), ('đ', '\u{1123e}')]; +pub const KHOJKI: &'static [(char, char)] = &[('đ', 'đ'), ('đ', '\u{11241}')]; pub const KHUDAWADI: &'static [(char, char)] = &[('đ°', '\u{112ea}'), ('đ°', 'đč')]; @@ -857,7 +890,7 @@ pub const LAO: &'static [(char, char)] = &[ ('àș§', 'àșœ'), ('à»', 'à»'), ('à»', 'à»'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('à»', 'à»'), ('à»', 'à»'), ]; @@ -886,15 +919,22 @@ pub const LATIN: &'static [(char, char)] = &[ ('â
', 'â'), ('â± ', 'Ɀ'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'êż'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'êż'), ('êŹ°', 'ê'), ('ê', 'ê€'), - ('êŠ', '\u{ab69}'), + ('êŠ', 'ê©'), ('ïŹ', 'ïŹ'), ('ïŒĄ', 'ïŒș'), ('ïœ', 'ïœ'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), ]; pub const LEPCHA: &'static [(char, char)] = @@ -921,8 +961,7 @@ pub const LINEAR_B: &'static [(char, char)] = &[ ('đ', 'đș'), ]; -pub const LISU: &'static [(char, char)] = - &[('ê', 'êż'), ('\u{11fb0}', '\u{11fb0}')]; +pub const LISU: &'static [(char, char)] = &[('ê', 'êż'), ('đŸ°', 'đŸ°')]; pub const LYCIAN: &'static [(char, char)] = &[('đ', 'đ')]; @@ -978,15 +1017,8 @@ pub const MIAO: &'static [(char, char)] = pub const MODI: &'static [(char, char)] = &[('đ', 'đ'), ('đ', 'đ')]; -pub const MONGOLIAN: &'static [(char, char)] = &[ - ('á ', 'á '), - ('á ', 'á '), - ('á ', '\u{180e}'), - ('á ', 'á '), - ('á ', 'ᥞ'), - ('áą', 'áąȘ'), - ('đ ', 'đŹ'), -]; +pub const MONGOLIAN: &'static [(char, char)] = + &[('á ', 'á '), ('á ', 'á '), ('á ', 'á '), ('á ', 'ᥞ'), ('áą', 'áąȘ'), ('đ ', 'đŹ')]; pub const MRO: &'static [(char, char)] = &[('đ©', 'đ©'), ('đ© ', 'đ©©'), ('đ©ź', 'đ©Ż')]; @@ -998,13 +1030,15 @@ pub const MYANMAR: &'static [(char, char)] = pub const NABATAEAN: &'static [(char, char)] = &[('đą', 'đą'), ('đą§', 'đąŻ')]; +pub const NAG_MUNDARI: &'static [(char, char)] = &[('đ', 'đč')]; + pub const NANDINAGARI: &'static [(char, char)] = &[('đŠ ', 'đŠ§'), ('đŠȘ', '\u{119d7}'), ('\u{119da}', 'đ§€')]; pub const NEW_TAI_LUE: &'static [(char, char)] = &[('áŠ', 'ካ'), ('ኰ', 'á§'), ('á§', 'á§'), ('á§', 'á§')]; -pub const NEWA: &'static [(char, char)] = &[('đ', 'đ'), ('đ', '\u{11461}')]; +pub const NEWA: &'static [(char, char)] = &[('đ', 'đ'), ('đ', 'đĄ')]; pub const NKO: &'static [(char, char)] = &[('ß', 'ßș'), ('\u{7fd}', 'ßż')]; @@ -1034,6 +1068,8 @@ pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('đ© ', 'đ©ż')]; pub const OLD_TURKIC: &'static [(char, char)] = &[('đ°', 'đ±')]; +pub const OLD_UYGHUR: &'static [(char, char)] = &[('đœ°', 'đŸ')]; + pub const ORIYA: &'static [(char, char)] = &[ ('\u{b01}', 'àŹ'), ('àŹ
', 'àŹ'), @@ -1118,7 +1154,7 @@ pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ê ', '\u{a82c}')]; pub const SYRIAC: &'static [(char, char)] = &[('Ü', 'Ü'), ('\u{70f}', '\u{74a}'), ('Ę', 'Ę'), ('àĄ ', 'àĄȘ')]; -pub const TAGALOG: &'static [(char, char)] = &[('á', 'á'), ('á', '\u{1714}')]; +pub const TAGALOG: &'static [(char, char)] = &[('á', 'á'), ('á', 'á')]; pub const TAGBANWA: &'static [(char, char)] = &[('á ', 'áŹ'), ('áź', 'á°'), ('\u{1772}', '\u{1773}')]; @@ -1135,7 +1171,7 @@ pub const TAI_THAM: &'static [(char, char)] = &[ pub const TAI_VIET: &'static [(char, char)] = &[('êȘ', 'ê«'), ('ê«', 'ê«')]; -pub const TAKRI: &'static [(char, char)] = &[('đ', 'đž'), ('đ', 'đ')]; +pub const TAKRI: &'static [(char, char)] = &[('đ', 'đč'), ('đ', 'đ')]; pub const TAMIL: &'static [(char, char)] = &[ ('\u{b82}', 'àź'), @@ -1158,23 +1194,22 @@ pub const TAMIL: &'static [(char, char)] = &[ ('đżż', 'đżż'), ]; -pub const TANGUT: &'static [(char, char)] = &[ - ('đż ', 'đż '), - ('đ', 'đ·'), - ('đ ', '\u{18aff}'), - ('\u{18d00}', '\u{18d08}'), -]; +pub const TANGSA: &'static [(char, char)] = &[('đ©°', 'đȘŸ'), ('đ«', 'đ«')]; + +pub const TANGUT: &'static [(char, char)] = + &[('đż ', 'đż '), ('đ', 'đ·'), ('đ ', 'đ«ż'), ('đŽ', 'đŽ')]; pub const TELUGU: &'static [(char, char)] = &[ ('\u{c00}', 'à°'), ('à°', 'à°'), ('à°', 'à°š'), ('à°Ș', 'à°č'), - ('à°œ', 'à±'), + ('\u{c3c}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', '\u{c63}'), ('ొ', 'à±Ż'), ('à±·', 'à±ż'), @@ -1199,19 +1234,29 @@ pub const TIFINAGH: &'static [(char, char)] = pub const TIRHUTA: &'static [(char, char)] = &[('đ', 'đ'), ('đ', 'đ')]; +pub const TOTO: &'static [(char, char)] = &[('đ', '\u{1e2ae}')]; + pub const UGARITIC: &'static [(char, char)] = &[('đ', 'đ'), ('đ', 'đ')]; pub const VAI: &'static [(char, char)] = &[('ê', 'ê«')]; +pub const VITHKUQI: &'static [(char, char)] = &[ + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), +]; + pub const WANCHO: &'static [(char, char)] = &[('đ', 'đč'), ('đż', 'đż')]; pub const WARANG_CITI: &'static [(char, char)] = &[('đą ', 'đŁČ'), ('đŁż', 'đŁż')]; -pub const YEZIDI: &'static [(char, char)] = &[ - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eab}', '\u{10ead}'), - ('\u{10eb0}', '\u{10eb1}'), -]; +pub const YEZIDI: &'static [(char, char)] = + &[('đș', 'đș©'), ('\u{10eab}', 'đș'), ('đș°', 'đș±')]; pub const YI: &'static [(char, char)] = &[('ê', 'ê'), ('ê', 'ê')]; diff --git a/src/unicode_tables/script_extension.rs b/src/unicode_tables/script_extension.rs index 7fca2af..42625e2 100644 --- a/src/unicode_tables/script_extension.rs +++ b/src/unicode_tables/script_extension.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate script-extension ucd-13.0.0 --chars +// ucd-generate script-extension ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Adlam", ADLAM), @@ -35,6 +35,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Coptic", COPTIC), ("Cuneiform", CUNEIFORM), ("Cypriot", CYPRIOT), + ("Cypro_Minoan", CYPRO_MINOAN), ("Cyrillic", CYRILLIC), ("Deseret", DESERET), ("Devanagari", DEVANAGARI), @@ -68,6 +69,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Kaithi", KAITHI), ("Kannada", KANNADA), ("Katakana", KATAKANA), + ("Kawi", KAWI), ("Kayah_Li", KAYAH_LI), ("Kharoshthi", KHAROSHTHI), ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT), @@ -102,6 +104,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Multani", MULTANI), ("Myanmar", MYANMAR), ("Nabataean", NABATAEAN), + ("Nag_Mundari", NAG_MUNDARI), ("Nandinagari", NANDINAGARI), ("New_Tai_Lue", NEW_TAI_LUE), ("Newa", NEWA), @@ -118,6 +121,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Old_Sogdian", OLD_SOGDIAN), ("Old_South_Arabian", OLD_SOUTH_ARABIAN), ("Old_Turkic", OLD_TURKIC), + ("Old_Uyghur", OLD_UYGHUR), ("Oriya", ORIYA), ("Osage", OSAGE), ("Osmanya", OSMANYA), @@ -149,6 +153,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tai_Viet", TAI_VIET), ("Takri", TAKRI), ("Tamil", TAMIL), + ("Tangsa", TANGSA), ("Tangut", TANGUT), ("Telugu", TELUGU), ("Thaana", THAANA), @@ -156,8 +161,10 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tibetan", TIBETAN), ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), + ("Toto", TOTO), ("Ugaritic", UGARITIC), ("Vai", VAI), + ("Vithkuqi", VITHKUQI), ("Wancho", WANCHO), ("Warang_Citi", WARANG_CITI), ("Yezidi", YEZIDI), @@ -166,32 +173,32 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ]; pub const ADLAM: &'static [(char, char)] = - &[('Ù', 'Ù'), ('đ€', 'đ„'), ('đ„', 'đ„'), ('đ„', 'đ„')]; + &[('Ű', 'Ű'), ('Ù', 'Ù'), ('đ€', 'đ„'), ('đ„', 'đ„'), ('đ„', 'đ„')]; pub const AHOM: &'static [(char, char)] = - &[('đ', 'đ'), ('\u{1171d}', '\u{1172b}'), ('đ°', 'đż')]; + &[('đ', 'đ'), ('\u{1171d}', '\u{1172b}'), ('đ°', 'đ')]; pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('đ', 'đ')]; pub const ARABIC: &'static [(char, char)] = &[ ('\u{600}', '\u{604}'), - ('Ű', '\u{61c}'), - ('Ű', '\u{6dc}'), + ('Ű', '\u{6dc}'), ('Û', 'Ûż'), ('Ę', 'Ęż'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('àĄ°', 'àą'), + ('\u{890}', '\u{891}'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'), - ('ï', 'ïŻ'), - ('ïŻ', ''), - ('ï”', 'ï¶'), + ('ï', 'ïŻ'), + ('ïŻ', 'ï¶'), ('ï¶', 'ï·'), - ('ï·°', 'ï·œ'), + ('ï·', 'ï·'), + ('ï·°', 'ï·ż'), ('ïč°', 'ïčŽ'), ('ïč¶', 'ﻌ'), ('\u{102e0}', 'đ»'), ('đč ', 'đčŸ'), + ('\u{10efd}', '\u{10eff}'), ('đž', 'đž'), ('đž
', 'đž'), ('đžĄ', 'đžą'), @@ -233,7 +240,7 @@ pub const ARMENIAN: &'static [(char, char)] = pub const AVESTAN: &'static [(char, char)] = &[('đŹ', 'đŹ”'), ('đŹč', 'đŹż')]; -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'á'), ('á', 'áŒ')]; +pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'á'), ('á', 'áŸ')]; pub const BAMUM: &'static [(char, char)] = &[('ê ', 'ê·'), ('đ ', 'đšž')]; @@ -284,13 +291,13 @@ pub const BOPOMOFO: &'static [(char, char)] = &[ ('ă·', 'ă·'), ('ă»', 'ă»'), ('ă
', 'ăŻ'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ïč
', 'ïč'), ('ïœĄ', ''), ]; pub const BRAHMI: &'static [(char, char)] = - &[('đ', 'đ'), ('đ', 'đŻ'), ('\u{1107f}', '\u{1107f}')]; + &[('đ', 'đ'), ('đ', 'đ”'), ('\u{1107f}', '\u{1107f}')]; pub const BRAILLE: &'static [(char, char)] = &[('â ', '⣿')]; @@ -300,7 +307,7 @@ pub const BUGINESE: &'static [(char, char)] = pub const BUHID: &'static [(char, char)] = &[('á”', 'á¶'), ('á', '\u{1753}')]; pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = - &[('á', 'áż'), ('áą°', 'ᣔ')]; + &[('á', 'áż'), ('áą°', 'ᣔ'), ('đȘ°', 'đȘż')]; pub const CARIAN: &'static [(char, char)] = &[('đ ', 'đ')]; @@ -308,7 +315,7 @@ pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[('đ°', 'đŁ'), ('đŻ', 'đŻ')]; pub const CHAKMA: &'static [(char, char)] = - &[('৊', 'à§Ż'), ('á', 'á'), ('\u{11100}', '\u{11134}'), ('đ¶', '\u{11147}')]; + &[('৊', 'à§Ż'), ('á', 'á'), ('\u{11100}', '\u{11134}'), ('đ¶', 'đ
')]; pub const CHAM: &'static [(char, char)] = &[('êš', '\u{aa36}'), ('ê©', 'ê©'), ('ê©', 'ê©'), ('ê©', 'ê©')]; @@ -316,10 +323,10 @@ pub const CHAM: &'static [(char, char)] = pub const CHEROKEE: &'static [(char, char)] = &[('á ', 'á”'), ('áž', 'áœ'), ('ê°', 'êźż')]; -pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')]; +pub const CHORASMIAN: &'static [(char, char)] = &[('đŸ°', 'đż')]; pub const COMMON: &'static [(char, char)] = &[ - ('\u{0}', '@'), + ('\0', '@'), ('[', '`'), ('{', '©'), ('«', 'Âč'), @@ -345,7 +352,7 @@ pub const COMMON: &'static [(char, char)] = &[ ('\u{2066}', 'â°'), ('âŽ', 'âŸ'), ('â', 'â'), - ('â ', 'âż'), + ('â ', 'â'), ('â', 'â„'), ('â§', 'â©'), ('âŹ', 'â±'), @@ -357,9 +364,9 @@ pub const COMMON: &'static [(char, char)] = &[ ('â ', 'âż'), ('â€', 'âł'), ('â¶', 'âź'), - ('\u{2b97}', '⯿'), + ('âź', '⯿'), ('âž', 'âč'), - ('âč', '\u{2e52}'), + ('âč', 'âč'), ('âż°', 'âż»'), ('\u{3000}', '\u{3000}'), ('ă', 'ă'), @@ -377,8 +384,7 @@ pub const COMMON: &'static [(char, char)] = &[ ('ê', 'êĄ'), ('ê', 'ê'), ('ê', 'ê'), - ('\u{ab6a}', '\u{ab6b}'), - ('', 'ïŽż'), + ('êȘ', 'ê«'), ('ïž', 'ïž'), ('ïž°', 'ïč'), ('ïč', 'ïč'), @@ -391,16 +397,17 @@ pub const COMMON: &'static [(char, char)] = &[ ('ïż ', 'ïżŠ'), ('ïżš', 'ïżź'), ('\u{fff9}', 'ïżœ'), - ('đ', '\u{1019c}'), + ('đ', 'đ'), ('đ', 'đŒ'), - ('đżą', 'đżŁ'), + ('đœ', 'đż'), ('đ', 'đ”'), ('đ', 'đŠ'), ('đ©', 'đ
Š'), ('đ
Ș', '\u{1d17a}'), ('đ', 'đ'), ('đ', 'đ©'), - ('đź', 'đš'), + ('đź', 'đȘ'), + ('đ', 'đ'), ('đ ', 'đł'), ('đ', 'đ'), ('đČ', 'đž'), @@ -433,38 +440,37 @@ pub const COMMON: &'static [(char, char)] = &[ ('đ±', 'đż'), ('đ', 'đ'), ('đ', 'đ”'), - ('đ', '\u{1f1ad}'), + ('đ', 'đ'), ('đŠ', 'đż'), ('đ', 'đ'), ('đ', 'đ»'), ('đ', 'đ'), ('đ ', 'đ„'), - ('đ', '\u{1f6d7}'), - ('đ ', 'đŹ'), - ('đ°', '\u{1f6fc}'), - ('đ', 'đł'), - ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đŹ'), + ('đ°', 'đŒ'), + ('đ', 'đ¶'), + ('đ»', 'đ'), ('đ ', 'đ«'), + ('đ°', 'đ°'), ('đ ', 'đ '), ('đ ', 'đĄ'), ('đĄ', 'đĄ'), ('đĄ ', 'đą'), ('đą', 'đą'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('đ€', '\u{1f978}'), - ('đ„ș', '\u{1f9cb}'), - ('đ§', 'đ©'), + ('đą°', 'đą±'), + ('đ€', 'đ©'), ('đ© ', 'đ©'), - ('đ©°', '\u{1fa74}'), - ('đ©ž', 'đ©ș'), - ('đȘ', '\u{1fa86}'), - ('đȘ', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('đ©°', 'đ©Œ'), + ('đȘ', 'đȘ'), + ('đȘ', 'đȘœ'), + ('đȘż', 'đ«
'), + ('đ«', 'đ«'), + ('đ« ', 'đ«š'), + ('đ«°', 'đ«ž'), + ('đŹ', 'đź'), + ('đź', 'đŻ'), + ('đŻ°', 'đŻč'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'), ]; @@ -487,6 +493,8 @@ pub const CYPRIOT: &'static [(char, char)] = &[ ('đ ż', 'đ ż'), ]; +pub const CYPRO_MINOAN: &'static [(char, char)] = &[('đ', 'đ'), ('đŸ', 'đżČ')]; + pub const CYRILLIC: &'static [(char, char)] = &[ ('Đ', 'ÔŻ'), ('áČ', 'áČ'), @@ -497,6 +505,8 @@ pub const CYRILLIC: &'static [(char, char)] = &[ ('âč', 'âč'), ('ê', '\u{a69f}'), ('\u{fe2e}', '\u{fe2f}'), + ('đ°', 'đ'), + ('\u{1e08f}', '\u{1e08f}'), ]; pub const DESERET: &'static [(char, char)] = &[('đ', 'đ')]; @@ -509,17 +519,18 @@ pub const DEVANAGARI: &'static [(char, char)] = &[ ('\u{20f0}', '\u{20f0}'), ('ê °', 'ê č'), ('\u{a8e0}', '\u{a8ff}'), + ('đŹ', 'đŹ'), ]; pub const DIVES_AKURU: &'static [(char, char)] = &[ - ('\u{11900}', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193b}', '\u{11946}'), - ('\u{11950}', '\u{11959}'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€”'), + ('đ€·', 'đ€ž'), + ('\u{1193b}', 'đ„'), + ('đ„', 'đ„'), ]; pub const DOGRA: &'static [(char, char)] = @@ -529,7 +540,7 @@ pub const DUPLOYAN: &'static [(char, char)] = &[('đ°', 'đ±Ș'), ('đ±°', 'đ±Œ'), ('đČ', 'đČ'), ('đČ', 'đČ'), ('đČ', '\u{1bca3}')]; pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('đ', 'đź'), ('\u{13430}', '\u{13438}')]; + &[('đ', '\u{13455}')]; pub const ELBASAN: &'static [(char, char)] = &[('đ', 'đ§')]; @@ -568,6 +579,10 @@ pub const ETHIOPIC: &'static [(char, char)] = &[ ('êŹ', 'êŹ'), ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ]; pub const GEORGIAN: &'static [(char, char)] = &[ @@ -585,8 +600,7 @@ pub const GEORGIAN: &'static [(char, char)] = &[ pub const GLAGOLITIC: &'static [(char, char)] = &[ ('\u{484}', '\u{484}'), ('\u{487}', '\u{487}'), - ('â°', 'â°ź'), - ('â°°', 'â±'), + ('â°', 'â±'), ('âč', 'âč'), ('\u{a66f}', '\u{a66f}'), ('\u{1e000}', '\u{1e006}'), @@ -739,23 +753,25 @@ pub const HAN: &'static [(char, char)] = &[ ('ă', 'ă°'), ('ă»', 'ăż'), ('ă ', 'ăŸ'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), + ('ă', '䶿'), + ('äž', 'éżż'), ('ê', 'ê'), ('ï€', 'ï©'), ('ï©°', 'ï«'), ('ïč
', 'ïč'), ('ïœĄ', ''), - ('\u{16ff0}', '\u{16ff1}'), + ('đżą', 'đżŁ'), + ('đż°', 'đż±'), ('đ ', 'đ±'), ('đ', 'đ'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const HANGUL: &'static [(char, char)] = &[ @@ -823,7 +839,8 @@ pub const HIRAGANA: &'static [(char, char)] = &[ ('ïœĄ', ''), ('', ''), ('\u{ff9e}', '\u{ff9f}'), - ('đ', 'đ'), + ('đ', 'đ'), + ('đČ', 'đČ'), ('đ
', 'đ
'), ('đ', 'đ'), ]; @@ -836,7 +853,7 @@ pub const INHERITED: &'static [(char, char)] = &[ ('\u{343}', '\u{344}'), ('\u{346}', '\u{362}'), ('\u{953}', '\u{954}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1dc2}', '\u{1df7}'), ('\u{1df9}', '\u{1df9}'), ('\u{1dfb}', '\u{1dff}'), @@ -845,6 +862,8 @@ pub const INHERITED: &'static [(char, char)] = &[ ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2d}'), ('\u{101fd}', '\u{101fd}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), @@ -861,8 +880,12 @@ pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] = pub const JAVANESE: &'static [(char, char)] = &[('\u{a980}', 'ê§'), ('ê§', 'ê§'), ('ê§', 'ê§')]; -pub const KAITHI: &'static [(char, char)] = - &[('à„Š', 'à„Ż'), ('ê °', 'ê č'), ('\u{11080}', 'đ'), ('\u{110cd}', '\u{110cd}')]; +pub const KAITHI: &'static [(char, char)] = &[ + ('à„Š', 'à„Ż'), + ('ê °', 'ê č'), + ('\u{11080}', '\u{110c2}'), + ('\u{110cd}', '\u{110cd}'), +]; pub const KANNADA: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -876,10 +899,10 @@ pub const KANNADA: &'static [(char, char)] = &[ ('\u{cc6}', 'àł'), ('àł', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', '\u{ce3}'), ('àłŠ', 'àłŻ'), - ('àł±', 'àłČ'), + ('àł±', 'àłł'), ('\u{1cd0}', '\u{1cd0}'), ('\u{1cd2}', '\u{1cd2}'), ('\u{1cda}', '\u{1cda}'), @@ -902,10 +925,18 @@ pub const KATAKANA: &'static [(char, char)] = &[ ('ă', 'ă'), ('ïč
', 'ïč'), ('ïœĄ', '\u{ff9f}'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), ('đ', 'đ'), + ('đ ', 'đą'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ]; +pub const KAWI: &'static [(char, char)] = + &[('\u{11f00}', 'đŒ'), ('đŒ', '\u{11f3a}'), ('đŒŸ', 'đœ')]; + pub const KAYAH_LI: &'static [(char, char)] = &[('ê€', 'ê€Ż')]; pub const KHAROSHTHI: &'static [(char, char)] = &[ @@ -920,13 +951,13 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[ ]; pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')]; + &[('\u{16fe4}', '\u{16fe4}'), ('đŹ', 'đł')]; pub const KHMER: &'static [(char, char)] = &[('á', '\u{17dd}'), ('á ', 'á©'), ('á°', 'áč'), ('᧠', '᧿')]; pub const KHOJKI: &'static [(char, char)] = - &[('à«Š', 'à«Ż'), ('ê °', 'ê č'), ('đ', 'đ'), ('đ', '\u{1123e}')]; + &[('à«Š', 'à«Ż'), ('ê °', 'ê č'), ('đ', 'đ'), ('đ', '\u{11241}')]; pub const KHUDAWADI: &'static [(char, char)] = &[('à„€', 'à„„'), ('ê °', 'ê č'), ('đ°', '\u{112ea}'), ('đ°', 'đč')]; @@ -940,7 +971,7 @@ pub const LAO: &'static [(char, char)] = &[ ('àș§', 'àșœ'), ('à»', 'à»'), ('à»', 'à»'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('à»', 'à»'), ('à»', 'à»'), ]; @@ -976,16 +1007,23 @@ pub const LATIN: &'static [(char, char)] = &[ ('â± ', 'Ɀ'), ('ê', 'ê'), ('êą', 'ê'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'êż'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'êż'), ('ê€ź', 'ê€ź'), ('êŹ°', 'ê'), ('ê', 'ê€'), - ('êŠ', '\u{ab69}'), + ('êŠ', 'ê©'), ('ïŹ', 'ïŹ'), ('ïŒĄ', 'ïŒș'), ('ïœ', 'ïœ'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), ]; pub const LEPCHA: &'static [(char, char)] = @@ -1016,8 +1054,7 @@ pub const LINEAR_B: &'static [(char, char)] = &[ ('đ·', 'đż'), ]; -pub const LISU: &'static [(char, char)] = - &[('ê', 'êż'), ('\u{11fb0}', '\u{11fb0}')]; +pub const LISU: &'static [(char, char)] = &[('ê', 'êż'), ('đŸ°', 'đŸ°')]; pub const LYCIAN: &'static [(char, char)] = &[('đ', 'đ')]; @@ -1082,8 +1119,7 @@ pub const MODI: &'static [(char, char)] = &[('ê °', 'ê č'), ('đ', 'đ'), ('đ', 'đ')]; pub const MONGOLIAN: &'static [(char, char)] = &[ - ('á ', '\u{180e}'), - ('á ', 'á '), + ('á ', 'á '), ('á ', 'ᥞ'), ('áą', 'áąȘ'), ('\u{202f}', '\u{202f}'), @@ -1100,6 +1136,8 @@ pub const MYANMAR: &'static [(char, char)] = pub const NABATAEAN: &'static [(char, char)] = &[('đą', 'đą'), ('đą§', 'đąŻ')]; +pub const NAG_MUNDARI: &'static [(char, char)] = &[('đ', 'đč')]; + pub const NANDINAGARI: &'static [(char, char)] = &[ ('à„€', 'à„„'), ('àłŠ', 'àłŻ'), @@ -1115,9 +1153,16 @@ pub const NANDINAGARI: &'static [(char, char)] = &[ pub const NEW_TAI_LUE: &'static [(char, char)] = &[('áŠ', 'ካ'), ('ኰ', 'á§'), ('á§', 'á§'), ('á§', 'á§')]; -pub const NEWA: &'static [(char, char)] = &[('đ', 'đ'), ('đ', '\u{11461}')]; +pub const NEWA: &'static [(char, char)] = &[('đ', 'đ'), ('đ', 'đĄ')]; -pub const NKO: &'static [(char, char)] = &[('ß', 'ßș'), ('\u{7fd}', 'ßż')]; +pub const NKO: &'static [(char, char)] = &[ + ('Ű', 'Ű'), + ('Ű', 'Ű'), + ('Ű', 'Ű'), + ('ß', 'ßș'), + ('\u{7fd}', 'ßż'), + ('', 'ïŽż'), +]; pub const NUSHU: &'static [(char, char)] = &[('đżĄ', 'đżĄ'), ('đ
°', 'đ»')]; @@ -1146,6 +1191,9 @@ pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('đ© ', 'đ©ż')]; pub const OLD_TURKIC: &'static [(char, char)] = &[('đ°', 'đ±')]; +pub const OLD_UYGHUR: &'static [(char, char)] = + &[('Ù', 'Ù'), ('đ«Č', 'đ«Č'), ('đœ°', 'đŸ')]; + pub const ORIYA: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), ('à„€', 'à„„'), @@ -1253,10 +1301,11 @@ pub const SYRIAC: &'static [(char, char)] = &[ ('Ę', 'Ę'), ('àĄ ', 'àĄȘ'), ('\u{1df8}', '\u{1df8}'), + ('\u{1dfa}', '\u{1dfa}'), ]; pub const TAGALOG: &'static [(char, char)] = - &[('á', 'á'), ('á', '\u{1714}'), ('á”', 'á¶')]; + &[('á', 'á'), ('á', 'á'), ('á”', 'á¶')]; pub const TAGBANWA: &'static [(char, char)] = &[('á”', 'á¶'), ('á ', 'áŹ'), ('áź', 'á°'), ('\u{1772}', '\u{1773}')]; @@ -1275,7 +1324,7 @@ pub const TAI_THAM: &'static [(char, char)] = &[ pub const TAI_VIET: &'static [(char, char)] = &[('êȘ', 'ê«'), ('ê«', 'ê«')]; pub const TAKRI: &'static [(char, char)] = - &[('à„€', 'à„„'), ('ê °', 'ê č'), ('đ', 'đž'), ('đ', 'đ')]; + &[('à„€', 'à„„'), ('ê °', 'ê č'), ('đ', 'đč'), ('đ', 'đ')]; pub const TAMIL: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -1305,12 +1354,10 @@ pub const TAMIL: &'static [(char, char)] = &[ ('đżż', 'đżż'), ]; -pub const TANGUT: &'static [(char, char)] = &[ - ('đż ', 'đż '), - ('đ', 'đ·'), - ('đ ', '\u{18aff}'), - ('\u{18d00}', '\u{18d08}'), -]; +pub const TANGSA: &'static [(char, char)] = &[('đ©°', 'đȘŸ'), ('đ«', 'đ«')]; + +pub const TANGUT: &'static [(char, char)] = + &[('đż ', 'đż '), ('đ', 'đ·'), ('đ ', 'đ«ż'), ('đŽ', 'đŽ')]; pub const TELUGU: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -1319,11 +1366,12 @@ pub const TELUGU: &'static [(char, char)] = &[ ('à°', 'à°'), ('à°', 'à°š'), ('à°Ș', 'à°č'), - ('à°œ', 'à±'), + ('\u{c3c}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', '\u{c63}'), ('ొ', 'à±Ż'), ('à±·', 'à±ż'), @@ -1365,10 +1413,23 @@ pub const TIRHUTA: &'static [(char, char)] = &[ ('đ', 'đ'), ]; +pub const TOTO: &'static [(char, char)] = &[('đ', '\u{1e2ae}')]; + pub const UGARITIC: &'static [(char, char)] = &[('đ', 'đ'), ('đ', 'đ')]; pub const VAI: &'static [(char, char)] = &[('ê', 'ê«')]; +pub const VITHKUQI: &'static [(char, char)] = &[ + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), +]; + pub const WANCHO: &'static [(char, char)] = &[('đ', 'đč'), ('đż', 'đż')]; pub const WARANG_CITI: &'static [(char, char)] = &[('đą ', 'đŁČ'), ('đŁż', 'đŁż')]; @@ -1378,9 +1439,9 @@ pub const YEZIDI: &'static [(char, char)] = &[ ('Ű', 'Ű'), ('Ű', 'Ű'), ('Ù ', 'Ù©'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eab}', '\u{10ead}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș', 'đș©'), + ('\u{10eab}', 'đș'), + ('đș°', 'đș±'), ]; pub const YI: &'static [(char, char)] = &[ diff --git a/src/unicode_tables/sentence_break.rs b/src/unicode_tables/sentence_break.rs index 67d830f..2434873 100644 --- a/src/unicode_tables/sentence_break.rs +++ b/src/unicode_tables/sentence_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate sentence-break ucd-13.0.0 --chars +// ucd-generate sentence-break ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ATerm", ATERM), @@ -29,7 +29,7 @@ pub const ATERM: &'static [(char, char)] = pub const CR: &'static [(char, char)] = &[('\r', '\r')]; pub const CLOSE: &'static [(char, char)] = &[ - ('\"', '\"'), + ('"', '"'), ('\'', ')'), ('[', '['), (']', ']'), @@ -57,6 +57,7 @@ pub const CLOSE: &'static [(char, char)] = &[ ('âž', 'âž'), ('âž ', 'âž©'), ('âč', 'âč'), + ('âč', 'âč'), ('ă', 'ă'), ('ă', 'ă'), ('ă', 'ă'), @@ -100,7 +101,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'à€'), ('\u{93a}', '\u{93c}'), ('à€Ÿ', 'à„'), @@ -142,6 +144,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('àŻ', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -154,6 +157,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('àł', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), + ('àłł', 'àłł'), ('\u{d00}', 'àŽ'), ('\u{d3b}', '\u{d3c}'), ('\u{d3e}', '\u{d44}'), @@ -172,7 +176,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -193,13 +197,14 @@ pub const EXTEND: &'static [(char, char)] = &[ ('á', 'á'), ('á', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1712}', 'á'), + ('\u{1732}', 'áŽ'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', 'ါ'), @@ -208,7 +213,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('á©', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'áŹ'), ('\u{1b34}', 'á'), ('\u{1b6b}', '\u{1b73}'), @@ -221,8 +226,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('áł·', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200d}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), @@ -276,11 +280,16 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('đ', 'đ'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', 'đ'), ('đ°', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11134}'), ('đ
', 'đ
'), @@ -288,9 +297,10 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11180}', 'đ'), ('đł', 'đ'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '\u{111cf}'), + ('đ', '\u{111cf}'), ('đŹ', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112ea}'), ('\u{11300}', 'đ'), ('\u{1133b}', '\u{1133c}'), @@ -311,11 +321,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'), ('đ Ź', '\u{1183a}'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('\u{11930}', 'đ€”'), + ('đ€·', 'đ€ž'), ('\u{1193b}', '\u{1193e}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11943}'), + ('đ„', 'đ„'), + ('đ„', '\u{11943}'), ('đ§', '\u{119d7}'), ('\u{119da}', '\u{119e0}'), ('đ§€', 'đ§€'), @@ -338,14 +348,22 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11d90}', '\u{11d91}'), ('đ¶', '\u{11d97}'), ('\u{11ef3}', 'đ»¶'), + ('\u{11f00}', '\u{11f01}'), + ('đŒ', 'đŒ'), + ('đŒŽ', '\u{11f3a}'), + ('đŒŸ', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('đœ', 'đŸ'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('đ
', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -363,8 +381,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'), @@ -377,6 +398,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{61c}', '\u{61c}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200b}'), @@ -388,7 +410,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), - ('\u{13430}', '\u{13438}'), + ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0001}', '\u{e0001}'), @@ -671,6 +693,7 @@ pub const LOWER: &'static [(char, char)] = &[ ('Ô', 'Ô'), ('ÔŻ', 'ÔŻ'), ('Ő ', 'Ö'), + ('áŒ', 'áŒ'), ('áž', 'áœ'), ('áČ', 'áČ'), ('áŽ', 'ᶿ'), @@ -833,7 +856,7 @@ pub const LOWER: &'static [(char, char)] = &[ ('â
°', 'â
ż'), ('â', 'â'), ('â', 'â©'), - ('â°°', 'â±'), + ('â°°', 'â±'), ('ⱥ', 'ⱥ'), ('ⱄ', 'ⱊ'), ('ⱚ', 'ⱚ'), @@ -1001,19 +1024,34 @@ pub const LOWER: &'static [(char, char)] = &[ ('ê»', 'ê»'), ('êœ', 'êœ'), ('êż', 'êż'), + ('ê', 'ê'), ('ê', 'ê'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'êŽ'), + ('ê¶', 'ê¶'), ('êž', 'êș'), ('êŹ°', 'ê'), - ('ê', '\u{ab68}'), + ('ê', 'ê©'), ('ê°', 'êźż'), ('ïŹ', 'ïŹ'), ('ïŹ', 'ïŹ'), ('ïœ', 'ïœ'), ('đš', 'đ'), ('đ', 'đ»'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), + ('đ', 'đ'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đł', 'đłČ'), ('đŁ', 'đŁ'), ('đč ', 'đčż'), @@ -1045,6 +1083,10 @@ pub const LOWER: &'static [(char, char)] = &[ ('đȘ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), ('đ€ą', 'đ„'), ]; @@ -1100,17 +1142,20 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ°', 'đč'), ('đŁ ', 'đŁ©'), - ('\u{11950}', '\u{11959}'), + ('đ„', 'đ„'), ('đ±', 'đ±'), ('đ”', 'đ”'), ('đ¶ ', 'đ¶©'), + ('đœ', 'đœ'), ('đ© ', 'đ©©'), + ('đ«', 'đ«'), ('đ', 'đ'), ('đ', 'đż'), ('đ
', 'đ
'), ('đ°', 'đč'), + ('đ°', 'đč'), ('đ„', 'đ„'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('đŻ°', 'đŻč'), ]; pub const OLETTER: &'static [(char, char)] = &[ @@ -1146,8 +1191,9 @@ pub const OLETTER: &'static [(char, char)] = &[ ('à š', 'à š'), ('àĄ', 'àĄ'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('àą ', 'àŁ'), ('à€', 'à€č'), ('à€œ', 'à€œ'), ('à„', 'à„'), @@ -1212,6 +1258,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('à°Ș', 'à°č'), ('à°œ', 'à°œ'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', 'à±Ą'), ('àČ', 'àČ'), ('àČ
', 'àČ'), @@ -1220,10 +1267,10 @@ pub const OLETTER: &'static [(char, char)] = &[ ('àČȘ', 'àČł'), ('àČ”', 'àČč'), ('àČœ', 'àČœ'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', 'àłĄ'), ('àł±', 'àłČ'), - ('\u{d04}', 'àŽ'), + ('àŽ', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', 'àŽș'), ('àŽœ', 'àŽœ'), @@ -1264,7 +1311,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('á”', 'á'), ('á', 'á'), ('á', 'áș'), - ('áŒ', 'á'), + ('áœ', 'á'), ('á', 'á'), ('á', 'á'), ('á', 'á'), @@ -1286,9 +1333,8 @@ pub const OLETTER: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('áź', 'áž'), - ('á', 'á'), - ('á', 'á'), - ('á ', 'á±'), + ('á', 'á'), + ('á', 'á±'), ('á', 'á'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -1309,7 +1355,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('áš ', 'á©'), ('áȘ§', 'áȘ§'), ('áŹ
', 'Ᏻ'), - ('á
', 'á'), + ('á
', 'á'), ('áź', 'áź '), ('áźź', '៯'), ('áźș', 'ᯄ'), @@ -1347,11 +1393,10 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ăŒ', 'ăż'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ă°', 'ăż'), - ('ă', '\u{4dbf}'), - ('äž', '\u{9ffc}'), - ('ê', 'ê'), + ('ă', '䶿'), + ('äž', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), ('ê', 'ê'), @@ -1399,7 +1444,6 @@ pub const OLETTER: &'static [(char, char)] = &[ ('êŹ', 'êŹ'), ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), - ('\u{ab69}', '\u{ab69}'), ('êŻ', 'êŻą'), ('ê°', 'íŁ'), ('í°', 'í'), @@ -1449,6 +1493,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -1477,19 +1522,22 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đź', 'đź'), ('đ°', 'đ±'), ('đŽ', 'đŽŁ'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș', 'đș©'), + ('đș°', 'đș±'), ('đŒ', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', 'đœ
'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', 'đŸ'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', 'đ·'), + ('đ±', 'đČ'), + ('đ”', 'đ”'), ('đ', 'đŻ'), ('đ', 'đš'), ('đ', 'đŠ'), ('đ
', 'đ
'), - ('\u{11147}', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', 'đ
Č'), ('đ
¶', 'đ
¶'), ('đ', 'đČ'), @@ -1498,6 +1546,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ«'), + ('đż', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -1515,7 +1564,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đ', 'đĄ'), ('đ', 'đŽ'), ('đ', 'đ'), - ('đ', '\u{11461}'), + ('đ', 'đĄ'), ('đ', 'đŻ'), ('đ', 'đ
'), ('đ', 'đ'), @@ -1526,14 +1575,15 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đ', 'đȘ'), ('đž', 'đž'), ('đ', 'đ'), + ('đ', 'đ'), ('đ ', 'đ «'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€Ż'), + ('đ€ż', 'đ€ż'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', 'đ§'), ('đ§Ą', 'đ§Ą'), @@ -1544,7 +1594,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đ©', 'đ©'), ('đ©', 'đȘ'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', 'đ°ź'), ('đ±', 'đ±'), @@ -1558,14 +1608,20 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đ”Ș', 'đ¶'), ('đ¶', 'đ¶'), ('đ» ', 'đ»Č'), - ('\u{11fb0}', '\u{11fb0}'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒł'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đź'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), + ('đ©°', 'đȘŸ'), ('đ«', 'đ«'), ('đŹ', 'đŹŻ'), ('đ', 'đ'), @@ -1577,20 +1633,32 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đż ', 'đżĄ'), ('đżŁ', 'đżŁ'), ('đ', 'đ·'), - ('đ ', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('đ', 'đ'), + ('đ ', 'đł'), + ('đŽ', 'đŽ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), + ('đ', 'đą'), + ('đČ', 'đČ'), ('đ
', 'đ
'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ('đ
°', 'đ»'), ('đ°', 'đ±Ș'), ('đ±°', 'đ±Œ'), ('đČ', 'đČ'), ('đČ', 'đČ'), + ('đŒ', 'đŒ'), ('đ', 'đŹ'), ('đ·', 'đœ'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ', 'đ«'), + ('đ', 'đ«'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('đ„', 'đ„'), ('đž', 'đž'), @@ -1626,13 +1694,14 @@ pub const OLETTER: &'static [(char, char)] = &[ ('đșĄ', 'đșŁ'), ('đș„', 'đș©'), ('đș«', 'đș»'), - ('đ ', '\u{2a6dd}'), - ('đȘ', 'đ«Ž'), + ('đ ', 'đȘ'), + ('đȘ', 'đ«č'), ('đ«', 'đ« '), ('đ« ', 'đŹșĄ'), ('đŹș°', '🯠'), ('đŻ ', 'đŻš'), - ('\u{30000}', '\u{3134a}'), + ('đ°', 'đ±'), + ('đ±', 'đČŻ'), ]; pub const SCONTINUE: &'static [(char, char)] = &[ @@ -1661,7 +1730,7 @@ pub const STERM: &'static [(char, char)] = &[ ('!', '!'), ('?', '?'), ('Ö', 'Ö'), - ('Ű', 'Ű'), + ('Ű', 'Ű'), ('Û', 'Û'), ('Ü', 'Ü'), ('ßč', 'ßč'), @@ -1680,12 +1749,14 @@ pub const STERM: &'static [(char, char)] = &[ ('áȘš', 'áȘ«'), ('á', 'á'), ('á', 'á'), + ('áœ', 'áŸ'), ('á°»', 'á°Œ'), ('ᱟ', '᱿'), ('âŒ', 'âœ'), ('â', 'â'), ('âžź', 'âžź'), ('➌', '➌'), + ('âč', 'âč'), ('ă', 'ă'), ('êż', 'êż'), ('ê', 'ê'), @@ -1704,6 +1775,7 @@ pub const STERM: &'static [(char, char)] = &[ ('ïœĄ', 'ïœĄ'), ('đ©', 'đ©'), ('đœ', 'đœ'), + ('đŸ', 'đŸ'), ('đ', 'đ'), ('đŸ', 'đ'), ('đ
', 'đ
'), @@ -1718,12 +1790,13 @@ pub const STERM: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đ'), ('đŒ', 'đŸ'), - ('\u{11944}', '\u{11944}'), - ('\u{11946}', '\u{11946}'), + ('đ„', 'đ„'), + ('đ„', 'đ„'), ('đ©', 'đ©'), ('đȘ', 'đȘ'), ('đ±', 'đ±'), ('đ»·', 'đ»ž'), + ('đœ', 'đœ'), ('đ©ź', 'đ©Ż'), ('đ«”', 'đ«”'), ('đŹ·', 'đŹž'), @@ -2183,7 +2256,7 @@ pub const UPPER: &'static [(char, char)] = &[ ('â
', 'â
Ż'), ('â', 'â'), ('â¶', 'â'), - ('â°', 'â°ź'), + ('â°', 'â°Ż'), ('â± ', 'â± '), ('ⱹ', 'â±€'), ('Ⱨ', 'Ⱨ'), @@ -2348,13 +2421,21 @@ pub const UPPER: &'static [(char, char)] = &[ ('êș', 'êș'), ('êŒ', 'êŒ'), ('êŸ', 'êŸ'), + ('ê', 'ê'), ('ê', 'ê'), - ('ê', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê”', 'ê”'), ('ïŒĄ', 'ïŒș'), ('đ', 'đ§'), ('đ°', 'đ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), ('đČ', 'đČČ'), ('đą ', 'đąż'), ('đč', 'đč'), diff --git a/src/unicode_tables/word_break.rs b/src/unicode_tables/word_break.rs index bd23e00..c071495 100644 --- a/src/unicode_tables/word_break.rs +++ b/src/unicode_tables/word_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate word-break ucd-13.0.0 --chars +// ucd-generate word-break ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ALetter", ALETTER), @@ -75,8 +75,9 @@ pub const ALETTER: &'static [(char, char)] = &[ ('à š', 'à š'), ('àĄ', 'àĄ'), ('àĄ ', 'àĄȘ'), - ('àą ', 'àąŽ'), - ('àą¶', '\u{8c7}'), + ('àĄ°', 'àą'), + ('àą', 'àą'), + ('àą ', 'àŁ'), ('à€', 'à€č'), ('à€œ', 'à€œ'), ('à„', 'à„'), @@ -141,6 +142,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('à°Ș', 'à°č'), ('à°œ', 'à°œ'), ('à±', 'à±'), + ('à±', 'à±'), ('à± ', 'à±Ą'), ('àČ', 'àČ'), ('àČ
', 'àČ'), @@ -149,10 +151,10 @@ pub const ALETTER: &'static [(char, char)] = &[ ('àČȘ', 'àČł'), ('àČ”', 'àČč'), ('àČœ', 'àČœ'), - ('àł', 'àł'), + ('àł', 'àł'), ('àł ', 'àłĄ'), ('àł±', 'àłČ'), - ('\u{d04}', 'àŽ'), + ('àŽ', 'àŽ'), ('àŽ', 'àŽ'), ('àŽ', 'àŽș'), ('àŽœ', 'àŽœ'), @@ -197,9 +199,8 @@ pub const ALETTER: &'static [(char, char)] = &[ ('á', 'á'), ('á ', 'áȘ'), ('áź', 'áž'), - ('á', 'á'), - ('á', 'á'), - ('á ', 'á±'), + ('á', 'á'), + ('á', 'á±'), ('á', 'á'), ('á ', 'áŹ'), ('áź', 'á°'), @@ -211,7 +212,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('á€', 'á€'), ('áš', 'áš'), ('áŹ
', 'Ᏻ'), - ('á
', 'á'), + ('á
', 'á'), ('áź', 'áź '), ('áźź', '៯'), ('áźș', 'ᯄ'), @@ -263,9 +264,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('â
', 'â
'), ('â
', 'â'), ('â¶', 'â©'), - ('â°', 'â°ź'), - ('â°°', 'â±'), - ('â± ', 'Ⳁ'), + ('â°', 'Ⳁ'), ('âł«', 'âłź'), ('âłČ', 'âłł'), ('âŽ', '⎄'), @@ -287,7 +286,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ă»', 'ăŒ'), ('ă
', 'ăŻ'), ('ă±', 'ă'), - ('ă ', '\u{31bf}'), + ('ă ', 'ăż'), ('ê', 'ê'), ('ê', 'êœ'), ('ê', 'ê'), @@ -296,9 +295,11 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ê', 'êź'), ('êż', 'ê'), ('ê ', 'êŻ'), - ('ê', 'êż'), - ('ê', '\u{a7ca}'), - ('\u{a7f5}', 'ê '), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('ê', 'ê'), + ('êČ', 'ê '), ('ê ', 'ê
'), ('ê ', 'ê '), ('ê ', 'ê ą'), @@ -322,7 +323,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('êŹ', 'êŹ'), ('êŹ ', 'êŹŠ'), ('êŹš', 'êŹź'), - ('êŹ°', '\u{ab69}'), + ('êŹ°', 'ê©'), ('ê°', 'êŻą'), ('ê°', 'íŁ'), ('í°', 'í'), @@ -365,9 +366,20 @@ pub const ALETTER: &'static [(char, char)] = &[ ('đ', 'đ»'), ('đ', 'đ§'), ('đ°', 'đŁ'), + ('đ°', 'đș'), + ('đŒ', 'đ'), + ('đ', 'đ'), + ('đ', 'đ'), + ('đ', 'đĄ'), + ('đŁ', 'đ±'), + ('đł', 'đč'), + ('đ»', 'đŒ'), ('đ', 'đ¶'), ('đ', 'đ'), ('đ ', 'đ§'), + ('đ', 'đ
'), + ('đ', 'đ°'), + ('đČ', 'đș'), ('đ ', 'đ
'), ('đ ', 'đ '), ('đ ', 'đ ”'), @@ -398,19 +410,22 @@ pub const ALETTER: &'static [(char, char)] = &[ ('đČ', 'đČČ'), ('đł', 'đłČ'), ('đŽ', 'đŽŁ'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('đș', 'đș©'), + ('đș°', 'đș±'), ('đŒ', 'đŒ'), ('đŒ§', 'đŒ§'), ('đŒ°', 'đœ
'), - ('\u{10fb0}', '\u{10fc4}'), + ('đœ°', 'đŸ'), + ('đŸ°', 'đż'), ('đż ', 'đż¶'), ('đ', 'đ·'), + ('đ±', 'đČ'), + ('đ”', 'đ”'), ('đ', 'đŻ'), ('đ', 'đš'), ('đ', 'đŠ'), ('đ
', 'đ
'), - ('\u{11147}', '\u{11147}'), + ('đ
', 'đ
'), ('đ
', 'đ
Č'), ('đ
¶', 'đ
¶'), ('đ', 'đČ'), @@ -419,6 +434,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ«'), + ('đż', 'đ'), ('đ', 'đ'), ('đ', 'đ'), ('đ', 'đ'), @@ -436,7 +452,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('đ', 'đĄ'), ('đ', 'đŽ'), ('đ', 'đ'), - ('đ', '\u{11461}'), + ('đ', 'đĄ'), ('đ', 'đŻ'), ('đ', 'đ
'), ('đ', 'đ'), @@ -448,13 +464,13 @@ pub const ALETTER: &'static [(char, char)] = &[ ('đž', 'đž'), ('đ ', 'đ «'), ('đą ', 'đŁ'), - ('đŁż', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('đŁż', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€'), + ('đ€', 'đ€Ż'), + ('đ€ż', 'đ€ż'), + ('đ„', 'đ„'), ('đŠ ', 'đŠ§'), ('đŠȘ', 'đ§'), ('đ§Ą', 'đ§Ą'), @@ -465,7 +481,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('đ©', 'đ©'), ('đ©', 'đȘ'), ('đȘ', 'đȘ'), - ('đ«', 'đ«ž'), + ('đȘ°', 'đ«ž'), ('đ°', 'đ°'), ('đ°', 'đ°ź'), ('đ±', 'đ±'), @@ -479,14 +495,20 @@ pub const ALETTER: &'static [(char, char)] = &[ ('đ”Ș', 'đ¶'), ('đ¶', 'đ¶'), ('đ» ', 'đ»Č'), - ('\u{11fb0}', '\u{11fb0}'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒ'), + ('đŒ', 'đŒł'), + ('đŸ°', 'đŸ°'), ('đ', 'đ'), ('đ', 'đź'), ('đ', 'đ'), - ('đ', 'đź'), + ('đŸ', 'đż°'), + ('đ', 'đŻ'), + ('đ', 'đ'), ('đ', 'đ'), ('đ ', 'đšž'), ('đ©', 'đ©'), + ('đ©°', 'đȘŸ'), ('đ«', 'đ«'), ('đŹ', 'đŹŻ'), ('đ', 'đ'), @@ -532,10 +554,19 @@ pub const ALETTER: &'static [(char, char)] = &[ ('đ', 'đš'), ('đȘ', 'đ'), ('đ', 'đ'), + ('đŒ', 'đŒ'), + ('đŒ„', 'đŒȘ'), + ('đ°', 'đ'), ('đ', 'đŹ'), ('đ·', 'đœ'), ('đ
', 'đ
'), + ('đ', 'đ'), ('đ', 'đ«'), + ('đ', 'đ«'), + ('đ ', 'đŠ'), + ('đš', 'đ«'), + ('đ', 'đź'), + ('đ°', 'đŸ'), ('đ ', 'đŁ'), ('đ€', 'đ„'), ('đ„', 'đ„'), @@ -579,7 +610,7 @@ pub const ALETTER: &'static [(char, char)] = &[ pub const CR: &'static [(char, char)] = &[('\r', '\r')]; -pub const DOUBLE_QUOTE: &'static [(char, char)] = &[('\"', '\"')]; +pub const DOUBLE_QUOTE: &'static [(char, char)] = &[('"', '"')]; pub const EXTEND: &'static [(char, char)] = &[ ('\u{300}', '\u{36f}'), @@ -606,7 +637,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'à€'), ('\u{93a}', '\u{93c}'), ('à€Ÿ', 'à„'), @@ -648,6 +680,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('àŻ', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', 'à±'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -660,6 +693,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('àł', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), + ('àłł', 'àłł'), ('\u{d00}', 'àŽ'), ('\u{d3b}', '\u{d3c}'), ('\u{d3e}', '\u{d44}'), @@ -678,7 +712,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -699,13 +733,14 @@ pub const EXTEND: &'static [(char, char)] = &[ ('á', 'á'), ('á', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1712}', 'á'), + ('\u{1732}', 'áŽ'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', 'ါ'), @@ -714,7 +749,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('á©', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'áŹ'), ('\u{1b34}', 'á'), ('\u{1b6b}', '\u{1b73}'), @@ -727,8 +762,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('áł·', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200c}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), @@ -782,11 +816,16 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('đ', 'đ'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', 'đ'), ('đ°', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11134}'), ('đ
', 'đ
'), @@ -794,9 +833,10 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11180}', 'đ'), ('đł', 'đ'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '\u{111cf}'), + ('đ', '\u{111cf}'), ('đŹ', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112ea}'), ('\u{11300}', 'đ'), ('\u{1133b}', '\u{1133c}'), @@ -817,11 +857,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'), ('đ Ź', '\u{1183a}'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('\u{11930}', 'đ€”'), + ('đ€·', 'đ€ž'), ('\u{1193b}', '\u{1193e}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11943}'), + ('đ„', 'đ„'), + ('đ„', '\u{11943}'), ('đ§', '\u{119d7}'), ('\u{119da}', '\u{119e0}'), ('đ§€', 'đ§€'), @@ -844,14 +884,22 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11d90}', '\u{11d91}'), ('đ¶', '\u{11d97}'), ('\u{11ef3}', 'đ»¶'), + ('\u{11f00}', '\u{11f01}'), + ('đŒ', 'đŒ'), + ('đŒŽ', '\u{11f3a}'), + ('đŒŸ', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('đœ', 'đŸ'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('đż°', 'đż±'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('đ
', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -869,8 +917,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('đ»', 'đż'), @@ -894,6 +945,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{61c}', '\u{61c}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200e}', '\u{200f}'), @@ -904,7 +956,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), - ('\u{13430}', '\u{13438}'), + ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0001}', '\u{e0001}'), @@ -932,7 +984,12 @@ pub const KATAKANA: &'static [(char, char)] = &[ ('ă', 'ăŸ'), ('ă', 'ă'), ('', 'ïŸ'), + ('đż°', 'đżł'), + ('đż”', 'đż»'), + ('đżœ', 'đżŸ'), ('đ', 'đ'), + ('đ ', 'đą'), + ('đ
', 'đ
'), ('đ
€', 'đ
§'), ]; @@ -1031,17 +1088,20 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('đ', 'đ'), ('đ°', 'đč'), ('đŁ ', 'đŁ©'), - ('\u{11950}', '\u{11959}'), + ('đ„', 'đ„'), ('đ±', 'đ±'), ('đ”', 'đ”'), ('đ¶ ', 'đ¶©'), + ('đœ', 'đœ'), ('đ© ', 'đ©©'), + ('đ«', 'đ«'), ('đ', 'đ'), ('đ', 'đż'), ('đ
', 'đ
'), ('đ°', 'đč'), + ('đ°', 'đč'), ('đ„', 'đ„'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('đŻ°', 'đŻč'), ]; pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('đŠ', 'đż')]; diff --git a/src/utf8.rs b/src/utf8.rs index dc05503..b9c8655 100644 --- a/src/utf8.rs +++ b/src/utf8.rs @@ -198,7 +198,7 @@ impl<'a> IntoIterator for &'a Utf8Sequence { type Item = &'a Utf8Range; fn into_iter(self) -> Self::IntoIter { - self.as_slice().into_iter() + self.as_slice().iter() } } @@ -448,7 +448,7 @@ fn max_scalar_value(nbytes: usize) -> u32 { 1 => 0x007F, 2 => 0x07FF, 3 => 0xFFFF, - 4 => 0x10FFFF, + 4 => 0x0010_FFFF, _ => unreachable!("invalid UTF-8 byte sequence size"), } } @@ -492,7 +492,7 @@ mod tests { fn single_codepoint_one_sequence() { // Tests that every range of scalar values that contains a single // scalar value is recognized by one sequence of byte ranges. - for i in 0x0..(0x10FFFF + 1) { + for i in 0x0..=0x0010_FFFF { let c = match char::from_u32(i) { None => continue, Some(c) => c, |