diff options
author | Jeff Vander Stoep <jeffv@google.com> | 2023-02-01 17:04:58 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2023-02-01 17:04:58 +0000 |
commit | ad7bc0041378b7e361c5f9272a6da640f49ea29d (patch) | |
tree | 4a5612f9494d98854601403f69c4beaebb472e11 | |
parent | 8cc600fbcfb556b18ae725de8cd6690ae9cbf3e0 (diff) | |
parent | f49fc6f5429635a2c9fc6e7e104f0ff9dad157d2 (diff) | |
download | bstr-ad7bc0041378b7e361c5f9272a6da640f49ea29d.tar.gz |
Upgrade bstr to 1.1.0 am: f49fc6f542
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/bstr/+/2411634
Change-Id: I171956602c4444c2b8ce8494a1f420f43caafee8
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | Android.bp | 2 | ||||
-rw-r--r-- | Cargo.lock.saved | 127 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | Cargo.toml.orig | 2 | ||||
-rw-r--r-- | METADATA | 10 | ||||
-rw-r--r-- | README.md | 30 | ||||
-rw-r--r-- | src/ext_slice.rs | 27 | ||||
-rw-r--r-- | src/impls.rs | 75 | ||||
-rw-r--r-- | src/io.rs | 2 | ||||
-rw-r--r-- | src/lib.rs | 2 | ||||
-rw-r--r-- | src/utf8.rs | 2 |
12 files changed, 120 insertions, 163 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 81b3348..78c885d 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,6 +1,6 @@ { "git": { - "sha1": "2900d6016b16acb907c70d2d87aa82d0172cd057" + "sha1": "86947727666d7b21c97eb16145b3ad6ac22aacd3" }, "path_in_vcs": "" }
\ No newline at end of file @@ -44,7 +44,7 @@ rust_library { host_supported: true, crate_name: "bstr", cargo_env_compat: true, - cargo_pkg_version: "1.0.1", + cargo_pkg_version: "1.1.0", srcs: ["src/lib.rs"], edition: "2021", features: [ diff --git a/Cargo.lock.saved b/Cargo.lock.saved deleted file mode 100644 index bcc42fb..0000000 --- a/Cargo.lock.saved +++ /dev/null @@ -1,127 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "bstr" -version = "1.0.1" -dependencies = [ - "memchr", - "once_cell", - "quickcheck", - "regex-automata", - "serde", - "ucd-parse", - "unicode-segmentation", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "getrandom" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "libc" -version = "0.2.132" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "once_cell" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0" - -[[package]] -name = "quickcheck" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "rand", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom", -] - -[[package]] -name = "regex" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" -dependencies = [ - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" - -[[package]] -name = "serde" -version = "1.0.144" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860" - -[[package]] -name = "ucd-parse" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc2d0556a998f4c55500ce1730901ba32bafbe820068cbdc091421525d61253b" -dependencies = [ - "once_cell", - "regex", -] - -[[package]] -name = "unicode-segmentation" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" @@ -13,7 +13,7 @@ edition = "2021" rust-version = "1.60" name = "bstr" -version = "1.0.1" +version = "1.1.0" authors = ["Andrew Gallant <jamslam@gmail.com>"] exclude = ["/.github"] description = "A string type that is not required to be valid UTF-8." diff --git a/Cargo.toml.orig b/Cargo.toml.orig index c2a17a2..b08cd68 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "bstr" -version = "1.0.1" #:version +version = "1.1.0" #:version authors = ["Andrew Gallant <jamslam@gmail.com>"] description = "A string type that is not required to be valid UTF-8." documentation = "https://docs.rs/bstr" @@ -11,13 +11,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/bstr/bstr-1.0.1.crate" + value: "https://static.crates.io/crates/bstr/bstr-1.1.0.crate" } - version: "1.0.1" + version: "1.1.0" license_type: NOTICE last_upgrade_date { - year: 2022 - month: 12 - day: 12 + year: 2023 + month: 2 + day: 1 } } @@ -6,7 +6,7 @@ differs from the standard library's `String` and `str` types in that they are not required to be valid UTF-8, but may be fully or partially valid UTF-8. [![Build status](https://github.com/BurntSushi/bstr/workflows/ci/badge.svg)](https://github.com/BurntSushi/bstr/actions) -[![Crates.io](https://img.shields.io/crates/v/bstr.svg)](https://crates.io/crates/bstr) +[![crates.io](https://img.shields.io/crates/v/bstr.svg)](https://crates.io/crates/bstr) ### Documentation @@ -29,7 +29,7 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -bstr = "1.0.0" +bstr = "1" ``` @@ -38,8 +38,8 @@ bstr = "1.0.0" The following two examples exhibit both the API features of byte strings and the I/O convenience functions provided for reading line-by-line quickly. -This first example simply shows how to efficiently iterate over lines in -stdin, and print out lines containing a particular substring: +This first example simply shows how to efficiently iterate over lines in stdin, +and print out lines containing a particular substring: ```rust use std::{error::Error, io::{self, Write}}; @@ -132,8 +132,8 @@ fn main() -> Result<(), Box<dyn Error>> { ### Cargo features -This crates comes with a few features that control standard library, serde -and Unicode support. +This crates comes with a few features that control standard library, serde and +Unicode support. * `std` - **Enabled** by default. This provides APIs that require the standard library, such as `Vec<u8>` and `PathBuf`. Enabling this feature also enables @@ -160,17 +160,17 @@ supported version of Rust. MSRV may be bumped in minor version releases. ### Future work -Since it is plausible that some of the types in this crate might end up in -your public API (e.g., `BStr` and `BString`), we will commit to being very +Since it is plausible that some of the types in this crate might end up in your +public API (e.g., `BStr` and `BString`), we will commit to being very conservative with respect to new major version releases. It's difficult to say precisely how conservative, but unless there is a major issue with the `1.0` release, I wouldn't expect a `2.0` release to come out any sooner than some period of years. A large part of the API surface area was taken from the standard library, so -from an API design perspective, a good portion of this crate should be on -solid ground. The main differences from the standard library are in how the -various substring search routines work. The standard library provides generic +from an API design perspective, a good portion of this crate should be on solid +ground. The main differences from the standard library are in how the various +substring search routines work. The standard library provides generic infrastructure for supporting different types of searches with a single method, where as this library prefers to define new methods for each type of search and drop the generic infrastructure. @@ -202,16 +202,16 @@ achieved with the standard library `Vec<u8>`/`&[u8]` APIs and the ecosystem of library crates. For example: * The standard library's - [`Utf8Error`](https://doc.rust-lang.org/std/str/struct.Utf8Error.html) - can be used for incremental lossy decoding of `&[u8]`. + [`Utf8Error`](https://doc.rust-lang.org/std/str/struct.Utf8Error.html) can be + used for incremental lossy decoding of `&[u8]`. * The [`unicode-segmentation`](https://unicode-rs.github.io/unicode-segmentation/unicode_segmentation/index.html) crate can be used for iterating over graphemes (or words), but is only implemented for `&str` types. One could use `Utf8Error` above to implement grapheme iteration with the same semantics as what `bstr` provides (automatic Unicode replacement codepoint substitution). -* The [`twoway`](https://docs.rs/twoway) crate can be used for - fast substring searching on `&[u8]`. +* The [`twoway`](https://docs.rs/twoway) crate can be used for fast substring + searching on `&[u8]`. So why create `bstr`? Part of the point of the `bstr` crate is to provide a uniform API of coupled components instead of relying on users to piece together diff --git a/src/ext_slice.rs b/src/ext_slice.rs index ec52a61..70f94e2 100644 --- a/src/ext_slice.rs +++ b/src/ext_slice.rs @@ -87,10 +87,23 @@ impl ByteSlice for [u8] { } } +impl<const N: usize> ByteSlice for [u8; N] { + #[inline] + fn as_bytes(&self) -> &[u8] { + self + } + + #[inline] + fn as_bytes_mut(&mut self) -> &mut [u8] { + self + } +} + /// Ensure that callers cannot implement `ByteSlice` by making an /// umplementable trait its super trait. pub trait Sealed {} impl Sealed for [u8] {} +impl<const N: usize> Sealed for [u8; N] {} /// A trait that extends `&[u8]` with string oriented methods. pub trait ByteSlice: Sealed { @@ -1311,11 +1324,11 @@ pub trait ByteSlice: Sealed { SplitReverse::new(self.as_bytes(), splitter.as_ref()) } - /// Split this byte string at the first occurance of `splitter`. + /// Split this byte string at the first occurrence of `splitter`. /// /// If the `splitter` is found in the byte string, returns a tuple - /// containing the parts of the string before and after the first occurance - /// of `splitter` respectively. Otherwise, if there are no occurances of + /// containing the parts of the string before and after the first occurrence + /// of `splitter` respectively. Otherwise, if there are no occurrences of /// `splitter` in the byte string, returns `None`. /// /// The splitter may be any type that can be cheaply converted into a @@ -1355,11 +1368,11 @@ pub trait ByteSlice: Sealed { Some((&bytes[..start], &bytes[end..])) } - /// Split this byte string at the last occurance of `splitter`. + /// Split this byte string at the last occurrence of `splitter`. /// /// If the `splitter` is found in the byte string, returns a tuple - /// containing the parts of the string before and after the last occurance - /// of `splitter`, respectively. Otherwise, if there are no occurances of + /// containing the parts of the string before and after the last occurrence + /// of `splitter`, respectively. Otherwise, if there are no occurrences of /// `splitter` in the byte string, returns `None`. /// /// The splitter may be any type that can be cheaply converted into a @@ -1902,7 +1915,7 @@ pub trait ByteSlice: Sealed { /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes); /// ``` /// - /// This example shows what happens when invalid UTF-8 is enountered. Note + /// This example shows what happens when invalid UTF-8 is encountered. Note /// that the offsets are valid indices into the original string, and do /// not necessarily correspond to the length of the `&str` returned! /// diff --git a/src/impls.rs b/src/impls.rs index 669aee6..eac4700 100644 --- a/src/impls.rs +++ b/src/impls.rs @@ -787,14 +787,14 @@ mod bstr_serde { mod bstring_serde { use core::{cmp, fmt}; - use alloc::{string::String, vec::Vec}; + use alloc::{boxed::Box, string::String, vec::Vec}; use serde::{ de::Error, de::SeqAccess, de::Visitor, Deserialize, Deserializer, Serialize, Serializer, }; - use crate::bstring::BString; + use crate::{bstr::BStr, bstring::BString}; impl Serialize for BString { #[inline] @@ -870,6 +870,77 @@ mod bstring_serde { deserializer.deserialize_byte_buf(BStringVisitor) } } + + impl<'de> Deserialize<'de> for Box<BStr> { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Box<BStr>, D::Error> + where + D: Deserializer<'de>, + { + struct BoxedBStrVisitor; + + impl<'de> Visitor<'de> for BoxedBStrVisitor { + type Value = Box<BStr>; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("a boxed byte string") + } + + #[inline] + fn visit_seq<V: SeqAccess<'de>>( + self, + mut visitor: V, + ) -> Result<Box<BStr>, V::Error> { + let len = cmp::min(visitor.size_hint().unwrap_or(0), 256); + let mut bytes = Vec::with_capacity(len); + while let Some(v) = visitor.next_element()? { + bytes.push(v); + } + Ok(BStr::from_boxed_bytes(bytes.into_boxed_slice())) + } + + #[inline] + fn visit_bytes<E: Error>( + self, + value: &[u8], + ) -> Result<Box<BStr>, E> { + Ok(BStr::from_boxed_bytes( + value.to_vec().into_boxed_slice(), + )) + } + + #[inline] + fn visit_byte_buf<E: Error>( + self, + value: Vec<u8>, + ) -> Result<Box<BStr>, E> { + Ok(BStr::from_boxed_bytes(value.into_boxed_slice())) + } + + #[inline] + fn visit_str<E: Error>( + self, + value: &str, + ) -> Result<Box<BStr>, E> { + Ok(BStr::from_boxed_bytes( + value.as_bytes().to_vec().into_boxed_slice(), + )) + } + + #[inline] + fn visit_string<E: Error>( + self, + value: String, + ) -> Result<Box<BStr>, E> { + Ok(BStr::from_boxed_bytes( + value.into_bytes().into_boxed_slice(), + )) + } + } + + deserializer.deserialize_byte_buf(BoxedBStrVisitor) + } + } } #[cfg(all(test, feature = "std"))] @@ -13,7 +13,7 @@ use std::io; use crate::{ext_slice::ByteSlice, ext_vec::ByteVec}; -/// An extention trait for +/// An extension trait for /// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html) /// which provides convenience APIs for dealing with byte strings. pub trait BufReadExt: io::BufRead { @@ -34,7 +34,7 @@ additional string oriented methods. Operations such as iterating over graphemes, searching for substrings, replacing substrings, trimming and case conversion are examples of things not provided on the standard library `&[u8]` APIs but are provided by this crate. For example, this code iterates over all -of occurrences of a subtring: +of occurrences of a substring: ``` use bstr::ByteSlice; diff --git a/src/utf8.rs b/src/utf8.rs index bc9bc52..4b5bc20 100644 --- a/src/utf8.rs +++ b/src/utf8.rs @@ -388,7 +388,7 @@ impl<'a> ::core::iter::FusedIterator for Utf8Chunks<'a> {} /// assert_eq!(err.error_len(), Some(3)); /// /// // In contrast to the above which contains a single invalid prefix, -/// // consider the case of multiple individal bytes that are never valid +/// // consider the case of multiple individual bytes that are never valid /// // prefixes. Note how the value of error_len changes! /// let s = b"foobar\xFF\xFFquux"; /// let err = s.to_str().unwrap_err(); |