diff options
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | Android.bp | 7 | ||||
-rw-r--r-- | Cargo.toml | 22 | ||||
-rw-r--r-- | Cargo.toml.orig | 15 | ||||
-rw-r--r-- | METADATA | 25 | ||||
-rw-r--r-- | src/lib.rs | 12 | ||||
-rw-r--r-- | src/punycode.rs | 31 | ||||
-rw-r--r-- | src/uts46.rs | 22 | ||||
-rw-r--r-- | tests/bad_punycode_tests.json | 6 | ||||
-rw-r--r-- | tests/punycode.rs | 39 |
10 files changed, 140 insertions, 41 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index b891109..e1e1c34 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,6 +1,6 @@ { "git": { - "sha1": "359bc90a4f07224f79cc79c45dc873d44bcd6f14" + "sha1": "00e9e18ce6504e0e9157816c593afec8e69cb9e3" }, "path_in_vcs": "idna" }
\ No newline at end of file @@ -42,9 +42,14 @@ rust_library { host_supported: true, crate_name: "idna", cargo_env_compat: true, - cargo_pkg_version: "0.3.0", + cargo_pkg_version: "0.5.0", srcs: ["src/lib.rs"], edition: "2018", + features: [ + "alloc", + "default", + "std", + ], rustlibs: [ "libunicode_bidi", "libunicode_normalization", @@ -13,13 +13,17 @@ edition = "2018" rust-version = "1.51" name = "idna" -version = "0.3.0" +version = "0.5.0" authors = ["The rust-url developers"] autotests = false description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." +categories = ["no_std"] license = "MIT OR Apache-2.0" repository = "https://github.com/servo/rust-url/" +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] + [lib] doctest = false @@ -35,10 +39,13 @@ name = "all" harness = false [dependencies.unicode-bidi] -version = "0.3" +version = "0.3.10" +features = ["hardcoded-data"] +default-features = false [dependencies.unicode-normalization] -version = "0.1.17" +version = "0.1.22" +default-features = false [dev-dependencies.assert_matches] version = "1.3" @@ -51,3 +58,12 @@ version = "1.0" [dev-dependencies.tester] version = "0.9" + +[features] +alloc = [] +default = ["std"] +std = [ + "alloc", + "unicode-bidi/std", + "unicode-normalization/std", +] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 942f122..9ca92d6 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,8 +1,9 @@ [package] name = "idna" -version = "0.3.0" +version = "0.5.0" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." +categories = ["no_std"] repository = "https://github.com/servo/rust-url/" license = "MIT OR Apache-2.0" autotests = false @@ -12,6 +13,11 @@ rust-version = "1.51" [lib] doctest = false +[features] +default = ["std"] +std = ["alloc", "unicode-bidi/std", "unicode-normalization/std"] +alloc = [] + [[test]] name = "tests" harness = false @@ -26,9 +32,12 @@ tester = "0.9" serde_json = "1.0" [dependencies] -unicode-bidi = "0.3" -unicode-normalization = "0.1.17" +unicode-bidi = { version = "0.3.10", default-features = false, features = ["hardcoded-data"] } +unicode-normalization = { version = "0.1.22", default-features = false } [[bench]] name = "all" harness = false + +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] @@ -1,23 +1,20 @@ # This project was upgraded with external_updater. -# Usage: tools/external_updater/updater.sh update rust/crates/idna -# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md +# Usage: tools/external_updater/updater.sh update external/rust/crates/idna +# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md name: "idna" description: "IDNA (Internationalizing Domain Names in Applications) and Punycode." third_party { - url { - type: HOMEPAGE - value: "https://crates.io/crates/idna" - } - url { - type: ARCHIVE - value: "https://static.crates.io/crates/idna/idna-0.3.0.crate" - } - version: "0.3.0" license_type: NOTICE last_upgrade_date { - year: 2022 - month: 12 - day: 12 + year: 2024 + month: 2 + day: 2 + } + homepage: "https://crates.io/crates/idna" + identifier { + type: "Archive" + value: "https://static.crates.io/crates/idna/idna-0.5.0.crate" + version: "0.5.0" } } @@ -31,11 +31,23 @@ //! > This document specifies a mechanism //! > that minimizes the impact of this transition for client software, //! > allowing client software to access domains that are valid under either system. +#![no_std] + +// For forwards compatibility +#[cfg(feature = "std")] +extern crate std; + +extern crate alloc; + +#[cfg(not(feature = "alloc"))] +compile_error!("the `alloc` feature must be enabled"); #[cfg(test)] #[macro_use] extern crate assert_matches; +use alloc::string::String; + pub mod punycode; mod uts46; diff --git a/src/punycode.rs b/src/punycode.rs index 21955f3..b1afc96 100644 --- a/src/punycode.rs +++ b/src/punycode.rs @@ -13,8 +13,9 @@ //! `encode_str` and `decode_to_string` provide convenience wrappers //! that convert from and to Rust’s UTF-8 based `str` and `String` types. -use std::char; -use std::u32; +use alloc::{string::String, vec::Vec}; +use core::char; +use core::u32; // Bootstring parameters for Punycode static BASE: u32 = 36; @@ -168,7 +169,7 @@ impl Decoder { } pub(crate) struct Decode<'a> { - base: std::str::Chars<'a>, + base: core::str::Chars<'a>, pub(crate) insertions: &'a [(usize, char)], inserted: usize, position: usize, @@ -214,6 +215,9 @@ impl<'a> ExactSizeIterator for Decode<'a> { /// This is a convenience wrapper around `encode`. #[inline] pub fn encode_str(input: &str) -> Option<String> { + if input.len() > u32::MAX as usize { + return None; + } let mut buf = String::with_capacity(input.len()); encode_into(input.chars(), &mut buf).ok().map(|()| buf) } @@ -223,6 +227,9 @@ pub fn encode_str(input: &str) -> Option<String> { /// Return None on overflow, which can only happen on inputs that would take more than /// 63 encoded bytes, the DNS limit on domain name labels. pub fn encode(input: &[char]) -> Option<String> { + if input.len() > u32::MAX as usize { + return None; + } let mut buf = String::with_capacity(input.len()); encode_into(input.iter().copied(), &mut buf) .ok() @@ -234,9 +241,9 @@ where I: Iterator<Item = char> + Clone, { // Handle "basic" (ASCII) code points. They are encoded as-is. - let (mut input_length, mut basic_length) = (0, 0); + let (mut input_length, mut basic_length) = (0u32, 0); for c in input.clone() { - input_length += 1; + input_length = input_length.checked_add(1).ok_or(())?; if c.is_ascii() { output.push(c); basic_length += 1; @@ -268,10 +275,7 @@ where for c in input.clone() { let c = c as u32; if c < code_point { - delta += 1; - if delta == 0 { - return Err(()); // Overflow - } + delta = delta.checked_add(1).ok_or(())?; } if c == code_point { // Represent delta as a generalized variable-length integer: @@ -313,3 +317,12 @@ fn value_to_digit(value: u32) -> char { _ => panic!(), } } + +#[test] +#[ignore = "slow"] +#[cfg(target_pointer_width = "64")] +fn huge_encode() { + let mut buf = String::new(); + assert!(encode_into(std::iter::repeat('ß').take(u32::MAX as usize + 1), &mut buf).is_err()); + assert_eq!(buf.len(), 0); +} diff --git a/src/uts46.rs b/src/uts46.rs index ec2fd0b..b082416 100644 --- a/src/uts46.rs +++ b/src/uts46.rs @@ -11,7 +11,9 @@ use self::Mapping::*; use crate::punycode; -use std::{error::Error as StdError, fmt}; + +use alloc::string::String; +use core::fmt; use unicode_bidi::{bidi_class, BidiClass}; use unicode_normalization::char::is_combining_mark; use unicode_normalization::{is_nfc, UnicodeNormalization}; @@ -70,10 +72,10 @@ fn find_char(codepoint: char) -> &'static Mapping { } struct Mapper<'a> { - chars: std::str::Chars<'a>, + chars: core::str::Chars<'a>, config: Config, errors: &'a mut Errors, - slice: Option<std::str::Chars<'static>>, + slice: Option<core::str::Chars<'static>>, } impl<'a> Iterator for Mapper<'a> { @@ -274,7 +276,7 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { /// http://www.unicode.org/reports/tr46/#Validity_Criteria fn check_validity(label: &str, config: Config, errors: &mut Errors) { let first_char = label.chars().next(); - if first_char == None { + if first_char.is_none() { // Empty string, pass return; } @@ -451,7 +453,7 @@ impl Idna { return Errors::default(); } let mut errors = processing(domain, self.config, &mut self.normalized, out); - self.output = std::mem::replace(out, String::with_capacity(out.len())); + self.output = core::mem::replace(out, String::with_capacity(out.len())); let mut first = true; for label in self.output.split('.') { if !first { @@ -475,7 +477,7 @@ impl Idna { /// http://www.unicode.org/reports/tr46/#ToASCII #[allow(clippy::wrong_self_convention)] - pub fn to_ascii<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> { + pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> { let mut errors = self.to_ascii_inner(domain, out); if self.config.verify_dns_length { @@ -497,7 +499,7 @@ impl Idna { /// http://www.unicode.org/reports/tr46/#ToUnicode #[allow(clippy::wrong_self_convention)] - pub fn to_unicode<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> { + pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> { if is_simple(domain) { out.push_str(domain); return Errors::default().into(); @@ -507,6 +509,7 @@ impl Idna { } #[derive(Clone, Copy)] +#[must_use] pub struct Config { use_std3_ascii_rules: bool, transitional_processing: bool, @@ -685,7 +688,7 @@ impl fmt::Debug for Errors { if !empty { f.write_str(", ")?; } - f.write_str(*name)?; + f.write_str(name)?; empty = false; } } @@ -708,7 +711,8 @@ impl From<Errors> for Result<(), Errors> { } } -impl StdError for Errors {} +#[cfg(feature = "std")] +impl std::error::Error for Errors {} impl fmt::Display for Errors { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { diff --git a/tests/bad_punycode_tests.json b/tests/bad_punycode_tests.json new file mode 100644 index 0000000..c011529 --- /dev/null +++ b/tests/bad_punycode_tests.json @@ -0,0 +1,6 @@ +[ + { + "description": "issue 870", + "decoded": " 21日 ᄢ 21日 㩴 ᄢ 21日 ᄢ " + } +] diff --git a/tests/punycode.rs b/tests/punycode.rs index 1a51cbc..2867e49 100644 --- a/tests/punycode.rs +++ b/tests/punycode.rs @@ -10,6 +10,7 @@ use crate::test::TestFn; use idna::punycode::{decode, encode_str}; use serde_json::map::Map; use serde_json::Value; +use std::panic::catch_unwind; use std::str::FromStr; fn one_test(decoded: &str, encoded: &str) { @@ -39,9 +40,18 @@ fn one_test(decoded: &str, encoded: &str) { } } +fn one_bad_test(encode: &str) { + let result = catch_unwind(|| encode_str(encode)); + assert!( + matches!(&result, Ok(None)), + "Should neither panic nor return Some result, but got {:?}", + result + ) +} + fn get_string<'a>(map: &'a Map<String, Value>, key: &str) -> &'a str { match map.get(&key.to_string()) { - Some(&Value::String(ref s)) => s, + Some(Value::String(s)) => s, None => "", _ => panic!(), } @@ -74,4 +84,31 @@ pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) { } other => panic!("{:?}", other), } + + match Value::from_str(include_str!("bad_punycode_tests.json")) { + Ok(Value::Array(tests)) => { + for (i, test) in tests.into_iter().enumerate() { + match test { + Value::Object(o) => { + let test_name = { + let desc = get_string(&o, "description"); + if desc.is_empty() { + format!("Bad Punycode {}", i + 1) + } else { + format!("Bad Punycode {}: {}", i + 1, desc) + } + }; + add_test( + test_name, + TestFn::DynTestFn(Box::new(move || { + one_bad_test(get_string(&o, "decoded")) + })), + ) + } + _ => panic!(), + } + } + } + other => panic!("{:?}", other), + } } |