diff options
author | Jeff Vander Stoep <jeffv@google.com> | 2023-04-03 14:25:16 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2023-04-03 14:25:16 +0000 |
commit | 9cbe0d3895c2d1ddcf19c32f7c0ca0ebc72b5e80 (patch) | |
tree | 8bf42c7258746fbadbb826b47ee121a0abd57234 | |
parent | 16e3ef9141800688259dadf29403a386f04b1ae5 (diff) | |
parent | b80e5b6b42d2c2d932a8ecbe6ec0821a3a07be09 (diff) | |
download | unicode-bidi-9cbe0d3895c2d1ddcf19c32f7c0ca0ebc72b5e80.tar.gz |
Upgrade unicode-bidi to 0.3.13 am: b967d0855e am: 6415a5cc2f am: f623f382d1 am: b80e5b6b42
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/unicode-bidi/+/2514757
Change-Id: I90e6654ccf1da04f5698c9a4a3ab91d6c0f4cf66
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | .rustfmt.toml | 1 | ||||
-rw-r--r-- | Android.bp | 26 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | Cargo.toml.orig | 2 | ||||
-rw-r--r-- | METADATA | 8 | ||||
-rw-r--r-- | cargo2android.json | 5 | ||||
-rw-r--r-- | src/deprecated.rs | 4 | ||||
-rw-r--r-- | src/level.rs | 15 | ||||
-rw-r--r-- | src/lib.rs | 87 |
10 files changed, 98 insertions, 54 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 67b5a63..c19fe9b 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,6 +1,6 @@ { "git": { - "sha1": "cd1de5d1ddbba789c29b6d69811ef49c820eefd4" + "sha1": "3b7a02fd6006a0f83c78a3f59405e97dfd75e311" }, "path_in_vcs": "" }
\ No newline at end of file diff --git a/.rustfmt.toml b/.rustfmt.toml index 7587a1d..e416686 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,2 +1 @@ array_width = 80 -brace_style = "SameLineWhere" @@ -44,7 +44,7 @@ rust_library { host_supported: true, crate_name: "unicode_bidi", cargo_env_compat: true, - cargo_pkg_version: "0.3.10", + cargo_pkg_version: "0.3.13", srcs: ["src/lib.rs"], edition: "2018", features: [ @@ -60,27 +60,3 @@ rust_library { vendor_available: true, min_sdk_version: "29", } - -rust_test { - name: "unicode-bidi_test_src_lib", - // has rustc warnings - host_supported: true, - crate_name: "unicode_bidi", - cargo_env_compat: true, - cargo_pkg_version: "0.3.10", - srcs: ["src/lib.rs"], - test_suites: ["general-tests"], - auto_gen_config: true, - test_options: { - unit_test: true, - }, - edition: "2018", - features: [ - "default", - "hardcoded-data", - "std", - ], - rustlibs: [ - "libserde_test", - ], -} @@ -12,7 +12,7 @@ [package] edition = "2018" name = "unicode-bidi" -version = "0.3.10" +version = "0.3.13" authors = ["The Servo Project Developers"] exclude = [ "benches/**", diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 02bc85b..1343fa4 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "unicode-bidi" -version = "0.3.10" +version = "0.3.13" authors = ["The Servo Project Developers"] license = "MIT OR Apache-2.0" description = "Implementation of the Unicode Bidirectional Algorithm" @@ -11,13 +11,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/unicode-bidi/unicode-bidi-0.3.10.crate" + value: "https://static.crates.io/crates/unicode-bidi/unicode-bidi-0.3.13.crate" } - version: "0.3.10" + version: "0.3.13" license_type: NOTICE last_upgrade_date { year: 2023 - month: 2 - day: 6 + month: 3 + day: 30 } } diff --git a/cargo2android.json b/cargo2android.json index 911c36f..da28e2c 100644 --- a/cargo2android.json +++ b/cargo2android.json @@ -6,6 +6,5 @@ "dependencies": true, "device": true, "min-sdk-version": "29", - "run": true, - "tests": true -}
\ No newline at end of file + "run": true +} diff --git a/src/deprecated.rs b/src/deprecated.rs index ec3b84f..74a24f5 100644 --- a/src/deprecated.rs +++ b/src/deprecated.rs @@ -46,8 +46,8 @@ pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> { start = i; run_level = new_level; - min_level = min(run_level, min_level); - max_level = max(run_level, max_level); + min_level = cmp::min(run_level, min_level); + max_level = cmp::max(run_level, max_level); } } runs.push(start..line.end); diff --git a/src/level.rs b/src/level.rs index f2e0d99..ef4f6d9 100644 --- a/src/level.rs +++ b/src/level.rs @@ -16,6 +16,7 @@ use alloc::string::{String, ToString}; use alloc::vec::Vec; use core::convert::{From, Into}; +use core::slice; use super::char_data::BidiClass; @@ -31,6 +32,7 @@ use super::char_data::BidiClass; /// <http://www.unicode.org/reports/tr9/#BD2> #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[repr(transparent)] pub struct Level(u8); pub const LTR_LEVEL: Level = Level(0); @@ -194,6 +196,19 @@ impl Level { pub fn vec(v: &[u8]) -> Vec<Level> { v.iter().map(|&x| x.into()).collect() } + + /// Converts a byte slice to a slice of Levels + /// + /// Does _not_ check if each level is within bounds (`<=` [`MAX_IMPLICIT_DEPTH`]), + /// which is not a requirement for safety but is a requirement for correctness of the algorithm. + pub fn from_slice_unchecked(v: &[u8]) -> &[Level] { + debug_assert_eq!(core::mem::size_of::<u8>(), core::mem::size_of::<Level>()); + unsafe { + // Safety: The two arrays are the same size and layout-compatible since + // Level is `repr(transparent)` over `u8` + slice::from_raw_parts(v as *const [u8] as *const u8 as *const Level, v.len()) + } + } } /// If levels has any RTL (odd) level @@ -65,7 +65,6 @@ //! //! [tr9]: <http://www.unicode.org/reports/tr9/> -#![forbid(unsafe_code)] #![no_std] // We need to link to std to make doc tests work on older Rust versions #[cfg(feature = "std")] @@ -94,7 +93,7 @@ pub use crate::char_data::{bidi_class, HardcodedBidiData}; use alloc::borrow::Cow; use alloc::string::String; use alloc::vec::Vec; -use core::cmp::{max, min}; +use core::cmp; use core::iter::repeat; use core::ops::Range; @@ -426,6 +425,14 @@ impl<'text> BidiInfo<'text> { /// /// the index map will result in `indexMap[visualIndex]==logicalIndex`. /// + /// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have + /// information about the actual text. + /// + /// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be + /// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level` + /// is for a single code point. + /// + /// /// # # Example /// ``` /// use unicode_bidi::BidiInfo; @@ -443,18 +450,31 @@ impl<'text> BidiInfo<'text> { /// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2]; /// let index_map = BidiInfo::reorder_visual(&levels); /// assert_eq!(levels.len(), index_map.len()); - /// assert_eq!(index_map, [0, 1, 2, 5, 4, 3, 6, 7]); + /// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]); /// ``` pub fn reorder_visual(levels: &[Level]) -> Vec<usize> { - // Gets the next range - fn next_range(levels: &[level::Level], start_index: usize) -> Range<usize> { + // Gets the next range of characters after start_index with a level greater + // than or equal to `max` + fn next_range(levels: &[level::Level], mut start_index: usize, max: Level) -> Range<usize> { if levels.is_empty() || start_index >= levels.len() { return start_index..start_index; } + while let Some(l) = levels.get(start_index) { + if *l >= max { + break; + } + start_index += 1; + } + + if levels.get(start_index).is_none() { + // If at the end of the array, adding one will + // produce an out-of-range end element + return start_index..start_index; + } let mut end_index = start_index + 1; - while end_index < levels.len() { - if levels[start_index] != levels[end_index] { + while let Some(l) = levels.get(end_index) { + if *l < max { return start_index..end_index; } end_index += 1; @@ -463,21 +483,50 @@ impl<'text> BidiInfo<'text> { start_index..end_index } + // This implementation is similar to the L2 implementation in `visual_runs()` + // but it cannot benefit from a precalculated LevelRun vector so needs to be different. + if levels.is_empty() { return vec![]; } + + // Get the min and max levels + let (mut min, mut max) = levels + .iter() + .fold((levels[0], levels[0]), |(min, max), &l| { + (cmp::min(min, l), cmp::max(max, l)) + }); + + // Initialize an index map let mut result: Vec<usize> = (0..levels.len()).collect(); - let mut range: Range<usize> = 0..0; - loop { - range = next_range(levels, range.end); - if levels[range.start].is_rtl() { + if min == max && min.is_ltr() { + // Everything is LTR and at the same level, do nothing + return result; + } + + // Stop at the lowest *odd* level, since everything below that + // is LTR and does not need further reordering + min = min.new_lowest_ge_rtl().expect("Level error"); + + // For each max level, take all contiguous chunks of + // levels ≥ max and reverse them + // + // We can do this check with the original levels instead of checking reorderings because all + // prior reorderings will have been for contiguous chunks of levels >> max, which will + // be a subset of these chunks anyway. + while min <= max { + let mut range = 0..0; + loop { + range = next_range(levels, range.end, max); result[range.clone()].reverse(); - } - if range.end >= levels.len() { - break; + if range.end >= levels.len() { + break; + } } + + max.lower(1).expect("Level error"); } result @@ -564,8 +613,8 @@ impl<'text> BidiInfo<'text> { runs.push(start..i); start = i; run_level = new_level; - min_level = min(run_level, min_level); - max_level = max(run_level, max_level); + min_level = cmp::min(run_level, min_level); + max_level = cmp::max(run_level, max_level); } } runs.push(start..line.end); @@ -578,6 +627,12 @@ impl<'text> BidiInfo<'text> { // Stop at the lowest *odd* level. min_level = min_level.new_lowest_ge_rtl().expect("Level error"); + // This loop goes through contiguous chunks of level runs that have a level + // ≥ max_level and reverses their contents, reducing max_level by 1 each time. + // + // It can do this check with the original levels instead of checking reorderings because all + // prior reorderings will have been for contiguous chunks of levels >> max, which will + // be a subset of these chunks anyway. while max_level >= min_level { // Look for the start of a sequence of consecutive runs of max_level or higher. let mut seq_start = 0; |