diff options
author | Jeff Vander Stoep <jeffv@google.com> | 2023-02-03 14:39:31 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2023-02-03 14:39:31 +0000 |
commit | b89878b402b9ef942acc8d0a66ea037d8eeef704 (patch) | |
tree | 1c68c0d6b6bd33620bdc4914faaeb7ae930c9fe0 | |
parent | a8724417ce0797023267316da8fa6db3d037a739 (diff) | |
parent | d362f28c90eebd16569dfe5c2837a03e6ace1bc0 (diff) | |
download | litrs-b89878b402b9ef942acc8d0a66ea037d8eeef704.tar.gz |
Upgrade litrs to 0.3.0 am: d362f28c90
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/litrs/+/2421159
Change-Id: I96e9411ac7e4962ba7adf1ef9f2076e891337556
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r-- | .cargo_vcs_info.json | 7 | ||||
-rw-r--r-- | Android.bp | 8 | ||||
-rw-r--r-- | CHANGELOG.md | 20 | ||||
-rw-r--r-- | Cargo.toml | 33 | ||||
-rw-r--r-- | Cargo.toml.orig | 3 | ||||
-rw-r--r-- | METADATA | 15 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | src/bool/mod.rs | 6 | ||||
-rw-r--r-- | src/byte/mod.rs | 70 | ||||
-rw-r--r-- | src/byte/tests.rs | 3 | ||||
-rw-r--r-- | src/bytestr/mod.rs | 10 | ||||
-rw-r--r-- | src/bytestr/tests.rs | 5 | ||||
-rw-r--r-- | src/char/mod.rs | 69 | ||||
-rw-r--r-- | src/char/tests.rs | 3 | ||||
-rw-r--r-- | src/err.rs | 3 | ||||
-rw-r--r-- | src/escape.rs | 4 | ||||
-rw-r--r-- | src/float/mod.rs | 195 | ||||
-rw-r--r-- | src/float/tests.rs | 7 | ||||
-rw-r--r-- | src/impls.rs | 64 | ||||
-rw-r--r-- | src/integer/mod.rs | 195 | ||||
-rw-r--r-- | src/integer/tests.rs | 11 | ||||
-rw-r--r-- | src/parse.rs | 18 | ||||
-rw-r--r-- | src/string/mod.rs | 10 | ||||
-rw-r--r-- | src/string/tests.rs | 5 | ||||
-rw-r--r-- | src/test_util.rs | 47 |
25 files changed, 534 insertions, 283 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index b790a60..2453d78 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,6 @@ { "git": { - "sha1": "219f0ca73d15cc518e9d56952b4e296dc8c17636" - } -} + "sha1": "3cff8b417bbde9cb80f9d3aa1be0704ec9c78006" + }, + "path_in_vcs": "" +}
\ No newline at end of file @@ -1,8 +1,6 @@ // This file is generated by cargo2android.py --config cargo2android.json. // Do not modify this file as changes will be overridden on upgrade. - - package { default_applicable_licenses: ["external_rust_crates_litrs_license"], } @@ -43,7 +41,7 @@ rust_library_host { name: "liblitrs", crate_name: "litrs", cargo_env_compat: true, - cargo_pkg_version: "0.2.3", + cargo_pkg_version: "0.3.0", srcs: ["src/lib.rs"], edition: "2018", features: [ @@ -54,8 +52,4 @@ rust_library_host { "libproc_macro2", ], compile_multilib: "first", - apex_available: [ - "//apex_available:platform", - "//apex_available:anyapex", - ], } diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e6628a..3d4ee99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,23 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [0.3.0] - 2022-12-19 +### Breaking +- Bump MSRV (minimal supported Rust version) to 1.54 + +### Added +- Add `raw_input` and `into_raw_input` to non-bool `*Lit` types +- Add `impl From<*Lit> for pm::Literal` (for non-bool literals) +- Add `impl From<BoolLit> for pm::Ident` + +### Fixed +- Fix link to reference and clarify bool literals ([#7](https://github.com/LukasKalbertodt/litrs/pull/7)) + +### Internals +- Move lots of parsing code into non-generic functions (this hopefully reduces compile times) +- To implement `[into_]raw_input` for integer and float literals, their + internals were changed a bit so that they store the full input string now. + ## [0.2.3] - 2021-06-09 ### Changed - Minor internal code change to bring MSRV from 1.52 to 1.42 @@ -51,7 +68,8 @@ All notable changes to this project will be documented in this file. - Everything -[Unreleased]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.3...HEAD +[Unreleased]: https://github.com/LukasKalbertodt/litrs/compare/v0.3.0...HEAD +[0.3.0]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.3...v0.3.0 [0.2.3]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.2...v0.2.3 [0.2.2]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.1...v0.2.2 [0.2.1]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.0...v0.2.1 @@ -3,26 +3,41 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] edition = "2018" +rust-version = "1.54" name = "litrs" -version = "0.2.3" +version = "0.3.0" authors = ["Lukas Kalbertodt <lukas.kalbertodt@gmail.com>"] exclude = [".github"] -description = "Parse and inspect Rust literals (i.e. tokens in the Rust programming language\nrepresenting fixed values). Particularly useful for proc macros, but can also\nbe used outside of a proc-macro context.\n" +description = """ +Parse and inspect Rust literals (i.e. tokens in the Rust programming language +representing fixed values). Particularly useful for proc macros, but can also +be used outside of a proc-macro context. +""" documentation = "https://docs.rs/litrs/" readme = "README.md" -keywords = ["literal", "parsing", "proc-macro", "type", "procedural"] -categories = ["development-tools::procedural-macro-helpers", "parser-implementations", "development-tools::build-utils"] +keywords = [ + "literal", + "parsing", + "proc-macro", + "type", + "procedural", +] +categories = [ + "development-tools::procedural-macro-helpers", + "parser-implementations", + "development-tools::build-utils", +] license = "MIT/Apache-2.0" repository = "https://github.com/LukasKalbertodt/litrs/" + [dependencies.proc-macro2] version = "1" optional = true diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 9adec4c..bbfd540 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,8 +1,9 @@ [package] name = "litrs" -version = "0.2.3" +version = "0.3.0" authors = ["Lukas Kalbertodt <lukas.kalbertodt@gmail.com>"] edition = "2018" +rust-version = "1.54" description = """ Parse and inspect Rust literals (i.e. tokens in the Rust programming language @@ -1,3 +1,7 @@ +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update rust/crates/litrs +# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md + name: "litrs" description: "Parse and inspect Rust literals (i.e. tokens in the Rust programming language representing fixed values). Particularly useful for proc macros, but can also be used outside of a proc-macro context." third_party { @@ -7,14 +11,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/litrs/litrs-0.2.3.crate" + value: "https://static.crates.io/crates/litrs/litrs-0.3.0.crate" } - version: "0.2.3" - # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same. + version: "0.3.0" license_type: NOTICE last_upgrade_date { - year: 2022 - month: 10 - day: 27 + year: 2023 + month: 2 + day: 3 } } @@ -1,10 +1,11 @@ # `litrs`: parsing and inspecting Rust literals -[<img alt="CI status of master" src="https://img.shields.io/github/workflow/status/LukasKalbertodt/litrs/CI/master?label=CI&logo=github&logoColor=white&style=for-the-badge" height="23">](https://github.com/LukasKalbertodt/litrs/actions?query=workflow%3ACI+branch%3Amaster) +[<img alt="CI status of main" src="https://img.shields.io/github/actions/workflow/status/LukasKalbertodt/litrs/ci.yml?branch=main&label=CI&logo=github&logoColor=white&style=for-the-badge" height="23">](https://github.com/LukasKalbertodt/litrs/actions/workflows/ci.yml) [<img alt="Crates.io Version" src="https://img.shields.io/crates/v/litrs?logo=rust&style=for-the-badge" height="23">](https://crates.io/crates/litrs) [<img alt="docs.rs" src="https://img.shields.io/crates/v/litrs?color=blue&label=docs&style=for-the-badge" height="23">](https://docs.rs/litrs) `litrs` offers functionality to parse Rust literals, i.e. tokens in the Rust programming language that represent fixed values. +For example: `27`, `"crab"`, `bool`. This is particularly useful for proc macros, but can also be used outside of a proc-macro context. **Why this library?** @@ -12,11 +13,10 @@ Unfortunately, the `proc_macro` API shipped with the compiler offers no easy way There are mainly two libraries for this purpose: [`syn`](https://github.com/dtolnay/syn) and [`literalext`](https://github.com/mystor/literalext). The latter is deprecated. -And `syn` is oftentimes overkill for the task at hand, especially when developing function like proc-macros (e.g. `foo!(..)`). +And `syn` is oftentimes overkill for the task at hand, especially when developing function-like proc-macros (e.g. `foo!(..)`). This crate is a lightweight alternative. Also, when it comes to literals, `litrs` offers a bit more flexibility and a few more features compared to `syn`. -While this library is fairly young, it is extensively tested and I think the number of parsing bugs should already be very low. I'm interested in community feedback! If you consider using this, please speak your mind [in this issue](https://github.com/LukasKalbertodt/litrs/issues/1). diff --git a/src/bool/mod.rs b/src/bool/mod.rs index 406174c..d7b54a1 100644 --- a/src/bool/mod.rs +++ b/src/bool/mod.rs @@ -5,7 +5,11 @@ use crate::{ParseError, err::{perr, ParseErrorKind::*}}; /// A bool literal: `true` or `false`. Also see [the reference][ref]. /// -/// [ref]: https://doc.rust-lang.org/reference/tokens.html#boolean-literals +/// Notice that, strictly speaking, from Rust point of view "boolean literals" are not +/// actual literals but [keywords]. +/// +/// [ref]: https://doc.rust-lang.org/reference/expressions/literal-expr.html#boolean-literal-expressions +/// [keywords]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BoolLit { False, diff --git a/src/byte/mod.rs b/src/byte/mod.rs index 5f60e42..7c64901 100644 --- a/src/byte/mod.rs +++ b/src/byte/mod.rs @@ -29,7 +29,8 @@ impl<B: Buffer> ByteLit<B> { return Err(perr(None, InvalidByteLiteralStart)); } - Self::parse_impl(input) + let value = parse_impl(&input)?; + Ok(Self { raw: input, value }) } /// Returns the byte value that this literal represents. @@ -37,37 +38,16 @@ impl<B: Buffer> ByteLit<B> { self.value } - /// Precondition: must start with `b'`. - pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> { - if input.len() == 2 { - return Err(perr(None, UnterminatedByteLiteral)); - } - if *input.as_bytes().last().unwrap() != b'\'' { - return Err(perr(None, UnterminatedByteLiteral)); - } - - let inner = &input[2..input.len() - 1]; - let first = inner.as_bytes().get(0).ok_or(perr(None, EmptyByteLiteral))?; - let (c, len) = match first { - b'\'' => return Err(perr(2, UnescapedSingleQuote)), - b'\n' | b'\t' | b'\r' - => return Err(perr(2, UnescapedSpecialWhitespace)), - - b'\\' => unescape::<u8>(inner, 2)?, - other if other.is_ascii() => (*other, 1), - _ => return Err(perr(2, NonAsciiInByteLiteral)), - }; - let rest = &inner[len..]; - - if !rest.is_empty() { - return Err(perr(len + 2..input.len() - 1, OverlongByteLiteral)); - } + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } - Ok(Self { - raw: input, - value: c, - }) + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw } + } impl ByteLit<&str> { @@ -87,5 +67,35 @@ impl<B: Buffer> fmt::Display for ByteLit<B> { } } +/// Precondition: must start with `b'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result<u8, ParseError> { + if input.len() == 2 { + return Err(perr(None, UnterminatedByteLiteral)); + } + if *input.as_bytes().last().unwrap() != b'\'' { + return Err(perr(None, UnterminatedByteLiteral)); + } + + let inner = &input[2..input.len() - 1]; + let first = inner.as_bytes().get(0).ok_or(perr(None, EmptyByteLiteral))?; + let (c, len) = match first { + b'\'' => return Err(perr(2, UnescapedSingleQuote)), + b'\n' | b'\t' | b'\r' + => return Err(perr(2, UnescapedSpecialWhitespace)), + + b'\\' => unescape::<u8>(inner, 2)?, + other if other.is_ascii() => (*other, 1), + _ => return Err(perr(2, NonAsciiInByteLiteral)), + }; + let rest = &inner[len..]; + + if !rest.is_empty() { + return Err(perr(len + 2..input.len() - 1, OverlongByteLiteral)); + } + + Ok(c) +} + #[cfg(test)] mod tests; diff --git a/src/byte/tests.rs b/src/byte/tests.rs index 5f663ce..08586b0 100644 --- a/src/byte/tests.rs +++ b/src/byte/tests.rs @@ -1,4 +1,4 @@ -use crate::{ByteLit, Literal, test_util::assert_parse_ok_eq}; +use crate::{ByteLit, Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}; // ===== Utility functions ======================================================================= @@ -13,6 +13,7 @@ macro_rules! check { assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse"); assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse"); assert_eq!(ByteLit::parse(input).unwrap().value(), $lit); + assert_roundtrip(expected.to_owned(), input); }; } diff --git a/src/bytestr/mod.rs b/src/bytestr/mod.rs index 6cfb61d..a2908b9 100644 --- a/src/bytestr/mod.rs +++ b/src/bytestr/mod.rs @@ -62,6 +62,16 @@ impl<B: Buffer> ByteStringLit<B> { self.num_hashes.is_some() } + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } + /// The range within `self.raw` that excludes the quotes and potential `r#`. fn inner_range(&self) -> Range<usize> { match self.num_hashes { diff --git a/src/bytestr/tests.rs b/src/bytestr/tests.rs index 8e3c033..b0480fd 100644 --- a/src/bytestr/tests.rs +++ b/src/bytestr/tests.rs @@ -1,4 +1,4 @@ -use crate::{Literal, ByteStringLit, test_util::assert_parse_ok_eq}; +use crate::{Literal, ByteStringLit, test_util::{assert_parse_ok_eq, assert_roundtrip}}; // ===== Utility functions ======================================================================= @@ -14,9 +14,10 @@ macro_rules! check { assert_parse_ok_eq( input, ByteStringLit::parse(input), expected.clone(), "ByteStringLit::parse"); assert_parse_ok_eq( - input, Literal::parse(input), Literal::ByteString(expected), "Literal::parse"); + input, Literal::parse(input), Literal::ByteString(expected.clone()), "Literal::parse"); assert_eq!(ByteStringLit::parse(input).unwrap().value(), $lit); assert_eq!(ByteStringLit::parse(input).unwrap().into_value().as_ref(), $lit); + assert_roundtrip(expected.into_owned(), input); }; } diff --git a/src/char/mod.rs b/src/char/mod.rs index 1480bdf..96d5037 100644 --- a/src/char/mod.rs +++ b/src/char/mod.rs @@ -24,7 +24,10 @@ impl<B: Buffer> CharLit<B> { /// is invalid or represents a different kind of literal. pub fn parse(input: B) -> Result<Self, ParseError> { match first_byte_or_empty(&input)? { - b'\'' => Self::parse_impl(input), + b'\'' => { + let value = parse_impl(&input)?; + Ok(Self { raw: input, value }) + }, _ => Err(perr(0, DoesNotStartWithQuote)), } } @@ -34,36 +37,16 @@ impl<B: Buffer> CharLit<B> { self.value } - /// Precondition: first character in input must be `'`. - pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> { - if input.len() == 1 { - return Err(perr(None, UnterminatedCharLiteral)); - } - if *input.as_bytes().last().unwrap() != b'\'' { - return Err(perr(None, UnterminatedCharLiteral)); - } - - let inner = &input[1..input.len() - 1]; - let first = inner.chars().nth(0).ok_or(perr(None, EmptyCharLiteral))?; - let (c, len) = match first { - '\'' => return Err(perr(1, UnescapedSingleQuote)), - '\n' | '\t' | '\r' - => return Err(perr(1, UnescapedSpecialWhitespace)), - - '\\' => unescape::<char>(inner, 1)?, - other => (other, other.len_utf8()), - }; - let rest = &inner[len..]; - - if !rest.is_empty() { - return Err(perr(len + 1..input.len() - 1, OverlongCharLiteral)); - } + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } - Ok(Self { - raw: input, - value: c, - }) + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw } + } impl CharLit<&str> { @@ -83,6 +66,34 @@ impl<B: Buffer> fmt::Display for CharLit<B> { } } +/// Precondition: first character in input must be `'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result<char, ParseError> { + if input.len() == 1 { + return Err(perr(None, UnterminatedCharLiteral)); + } + if *input.as_bytes().last().unwrap() != b'\'' { + return Err(perr(None, UnterminatedCharLiteral)); + } + + let inner = &input[1..input.len() - 1]; + let first = inner.chars().nth(0).ok_or(perr(None, EmptyCharLiteral))?; + let (c, len) = match first { + '\'' => return Err(perr(1, UnescapedSingleQuote)), + '\n' | '\t' | '\r' + => return Err(perr(1, UnescapedSpecialWhitespace)), + + '\\' => unescape::<char>(inner, 1)?, + other => (other, other.len_utf8()), + }; + let rest = &inner[len..]; + + if !rest.is_empty() { + return Err(perr(len + 1..input.len() - 1, OverlongCharLiteral)); + } + + Ok(c) +} #[cfg(test)] mod tests; diff --git a/src/char/tests.rs b/src/char/tests.rs index 01ca2c5..bfae5e4 100644 --- a/src/char/tests.rs +++ b/src/char/tests.rs @@ -1,4 +1,4 @@ -use crate::{Literal, test_util::assert_parse_ok_eq}; +use crate::{Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}; use super::CharLit; // ===== Utility functions ======================================================================= @@ -14,6 +14,7 @@ macro_rules! check { assert_parse_ok_eq(input, CharLit::parse(input), expected.clone(), "CharLit::parse"); assert_parse_ok_eq(input, Literal::parse(input), Literal::Char(expected), "Literal::parse"); assert_eq!(CharLit::parse(input).unwrap().value(), $lit); + assert_roundtrip(expected.to_owned(), input); }; } @@ -180,16 +180,19 @@ pub(crate) trait SpanLike { } impl SpanLike for Option<Range<usize>> { + #[inline(always)] fn into_span(self) -> Option<Range<usize>> { self } } impl SpanLike for Range<usize> { + #[inline(always)] fn into_span(self) -> Option<Range<usize>> { Some(self) } } impl SpanLike for usize { + #[inline(always)] fn into_span(self) -> Option<Range<usize>> { Some(self..self + 1) } diff --git a/src/escape.rs b/src/escape.rs index 3e93113..19b63a1 100644 --- a/src/escape.rs +++ b/src/escape.rs @@ -108,11 +108,12 @@ impl Escapee for char { /// Checks whether the character is skipped after a string continue start /// (unescaped backlash followed by `\n`). -pub(crate) fn is_string_continue_skipable_whitespace(b: u8) -> bool { +fn is_string_continue_skipable_whitespace(b: u8) -> bool { b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' } /// Unescapes a whole string or byte string. +#[inline(never)] pub(crate) fn unescape_string<E: Escapee>( input: &str, offset: usize, @@ -180,6 +181,7 @@ pub(crate) fn unescape_string<E: Escapee>( /// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to /// just `\n` sequences. Returns an optional new string (if the input contained /// any `\r\n`) and the number of hashes used by the literal. +#[inline(never)] pub(crate) fn scan_raw_string<E: Escapee>( input: &str, offset: usize, diff --git a/src/float/mod.rs b/src/float/mod.rs index e30a336..b196845 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -21,27 +21,27 @@ use crate::{ /// [ref]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct FloatLit<B: Buffer> { - /// Basically the whole literal, but without the type suffix. Other `usize` - /// fields in this struct partition this string. `end_integer_part` is - /// always <= `end_fractional_part`. + /// The whole raw input. The `usize` fields in this struct partition this + /// string. Always true: `end_integer_part <= end_fractional_part`. /// /// ```text - /// 12_3.4_56e789 - /// ╷ ╷ + /// 12_3.4_56e789f32 + /// ╷ ╷ ╷ + /// | | └ end_number_part = 13 /// | └ end_fractional_part = 9 /// └ end_integer_part = 4 /// /// 246. /// ╷╷ - /// |└ end_fractional_part = 4 + /// |└ end_fractional_part = end_number_part = 4 /// └ end_integer_part = 3 /// /// 1234e89 - /// ╷ - /// | + /// ╷ ╷ + /// | └ end_number_part = 7 /// └ end_integer_part = end_fractional_part = 4 /// ``` - number_part: B, + raw: B, /// The first index not part of the integer part anymore. Since the integer /// part is at the start, this is also the length of that part. @@ -50,6 +50,9 @@ pub struct FloatLit<B: Buffer> { /// The first index after the fractional part. end_fractional_part: usize, + /// The first index after the whole number part (everything except type suffix). + end_number_part: usize, + /// Optional type suffix. type_suffix: Option<FloatType>, } @@ -66,7 +69,24 @@ impl<B: Buffer> FloatLit<B> { /// input is invalid or represents a different kind of literal. pub fn parse(s: B) -> Result<Self, ParseError> { match first_byte_or_empty(&s)? { - b'0'..=b'9' => Self::parse_impl(s), + b'0'..=b'9' => { + // TODO: simplify once RFC 2528 is stabilized + let FloatLit { + end_integer_part, + end_fractional_part, + end_number_part, + type_suffix, + .. + } = parse_impl(&s)?; + + Ok(Self { + raw: s, + end_integer_part, + end_fractional_part, + end_number_part, + type_suffix, + }) + }, _ => Err(perr(0, DoesNotStartWithDigit)), } } @@ -76,12 +96,12 @@ impl<B: Buffer> FloatLit<B> { /// floating point value, you need to parse this string, e.g. with /// `f32::from_str` or an external crate. pub fn number_part(&self) -> &str { - &self.number_part + &(*self.raw)[..self.end_number_part] } /// Returns the non-empty integer part of this literal. pub fn integer_part(&self) -> &str { - &(*self.number_part)[..self.end_integer_part] + &(*self.raw)[..self.end_integer_part] } /// Returns the optional fractional part of this literal. Does not include @@ -91,14 +111,14 @@ impl<B: Buffer> FloatLit<B> { if self.end_integer_part == self.end_fractional_part { None } else { - Some(&(*self.number_part)[self.end_integer_part + 1..self.end_fractional_part]) + Some(&(*self.raw)[self.end_integer_part + 1..self.end_fractional_part]) } } /// Optional exponent part. Might be empty if there was no exponent part in /// the input. Includes the `e` or `E` at the beginning. pub fn exponent_part(&self) -> &str { - &(*self.number_part)[self.end_fractional_part..] + &(*self.raw)[self.end_fractional_part..self.end_number_part] } /// The optional type suffix. @@ -106,70 +126,14 @@ impl<B: Buffer> FloatLit<B> { self.type_suffix } - /// Precondition: first byte of string has to be in `b'0'..=b'9'`. - pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> { - // Integer part. - let end_integer_part = end_dec_digits(&input); - let rest = &input[end_integer_part..]; - - - // Fractional part. - let end_fractional_part = if rest.as_bytes().get(0) == Some(&b'.') { - // The fractional part must not start with `_`. - if rest.as_bytes().get(1) == Some(&b'_') { - return Err(perr(end_integer_part + 1, UnexpectedChar)); - } - - end_dec_digits(&rest[1..]) + 1 + end_integer_part - } else { - end_integer_part - }; - let rest = &input[end_fractional_part..]; - - // If we have a period that is not followed by decimal digits, the - // literal must end now. - if end_integer_part + 1 == end_fractional_part && !rest.is_empty() { - return Err(perr(end_integer_part + 1, UnexpectedChar)); - } - - - // Optional exponent. - let end_number_part = if rest.starts_with('e') || rest.starts_with('E') { - // Strip single - or + sign at the beginning. - let exp_number_start = match rest.as_bytes().get(1) { - Some(b'-') | Some(b'+') => 2, - _ => 1, - }; - - // Find end of exponent and make sure there is at least one digit. - let end_exponent = end_dec_digits(&rest[exp_number_start..]) + exp_number_start; - if !rest[exp_number_start..end_exponent].bytes().any(|b| matches!(b, b'0'..=b'9')) { - return Err(perr( - end_fractional_part..end_fractional_part + end_exponent, - NoExponentDigits, - )); - } - - end_exponent + end_fractional_part - } else { - end_fractional_part - }; - - - // Type suffix - let type_suffix = match &input[end_number_part..] { - "" => None, - "f32" => Some(FloatType::F32), - "f64" => Some(FloatType::F64), - _ => return Err(perr(end_number_part..input.len(), InvalidFloatTypeSuffix)), - }; + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } - Ok(Self { - number_part: input.cut(0..end_number_part), - end_integer_part, - end_fractional_part, - type_suffix, - }) + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw } } @@ -178,9 +142,10 @@ impl FloatLit<&str> { /// `Self`. pub fn to_owned(&self) -> FloatLit<String> { FloatLit { - number_part: self.number_part.to_owned(), + raw: self.raw.to_owned(), end_integer_part: self.end_integer_part, end_fractional_part: self.end_fractional_part, + end_number_part: self.end_number_part, type_suffix: self.type_suffix, } } @@ -188,15 +153,77 @@ impl FloatLit<&str> { impl<B: Buffer> fmt::Display for FloatLit<B> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let suffix = match self.type_suffix { - None => "", - Some(FloatType::F32) => "f32", - Some(FloatType::F64) => "f64", - }; - write!(f, "{}{}", self.number_part(), suffix) + write!(f, "{}", &*self.raw) } } +/// Precondition: first byte of string has to be in `b'0'..=b'9'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str) -> Result<FloatLit<&str>, ParseError> { + // Integer part. + let end_integer_part = end_dec_digits(input.as_bytes()); + let rest = &input[end_integer_part..]; + + + // Fractional part. + let end_fractional_part = if rest.as_bytes().get(0) == Some(&b'.') { + // The fractional part must not start with `_`. + if rest.as_bytes().get(1) == Some(&b'_') { + return Err(perr(end_integer_part + 1, UnexpectedChar)); + } + + end_dec_digits(rest[1..].as_bytes()) + 1 + end_integer_part + } else { + end_integer_part + }; + let rest = &input[end_fractional_part..]; + + // If we have a period that is not followed by decimal digits, the + // literal must end now. + if end_integer_part + 1 == end_fractional_part && !rest.is_empty() { + return Err(perr(end_integer_part + 1, UnexpectedChar)); + } + + + // Optional exponent. + let end_number_part = if rest.starts_with('e') || rest.starts_with('E') { + // Strip single - or + sign at the beginning. + let exp_number_start = match rest.as_bytes().get(1) { + Some(b'-') | Some(b'+') => 2, + _ => 1, + }; + + // Find end of exponent and make sure there is at least one digit. + let end_exponent = end_dec_digits(rest[exp_number_start..].as_bytes()) + exp_number_start; + if !rest[exp_number_start..end_exponent].bytes().any(|b| matches!(b, b'0'..=b'9')) { + return Err(perr( + end_fractional_part..end_fractional_part + end_exponent, + NoExponentDigits, + )); + } + + end_exponent + end_fractional_part + } else { + end_fractional_part + }; + + + // Type suffix + let type_suffix = match &input[end_number_part..] { + "" => None, + "f32" => Some(FloatType::F32), + "f64" => Some(FloatType::F64), + _ => return Err(perr(end_number_part..input.len(), InvalidFloatTypeSuffix)), + }; + + Ok(FloatLit { + raw: input, + end_integer_part, + end_fractional_part, + end_number_part, + type_suffix, + }) +} #[cfg(test)] mod tests; diff --git a/src/float/tests.rs b/src/float/tests.rs index 44f734f..f15af05 100644 --- a/src/float/tests.rs +++ b/src/float/tests.rs @@ -1,6 +1,6 @@ use crate::{ Literal, ParseError, - test_util::assert_parse_ok_eq, + test_util::{assert_parse_ok_eq, assert_roundtrip}, }; use super::{FloatLit, FloatType}; @@ -16,9 +16,10 @@ macro_rules! check { ($intpart:literal $fracpart:literal $exppart:literal $suffix:tt) => { let input = concat!($intpart, $fracpart, $exppart, check!(@stringify_suffix $suffix)); let expected_float = FloatLit { - number_part: concat!($intpart, $fracpart, $exppart), + raw: input, end_integer_part: $intpart.len(), end_fractional_part: $intpart.len() + $fracpart.len(), + end_number_part: $intpart.len() + $fracpart.len() + $exppart.len(), type_suffix: check!(@ty $suffix), }; @@ -26,7 +27,7 @@ macro_rules! check { input, FloatLit::parse(input), expected_float.clone(), "FloatLit::parse"); assert_parse_ok_eq( input, Literal::parse(input), Literal::Float(expected_float), "Literal::parse"); - + assert_roundtrip(expected_float.to_owned(), input); }; (@ty f32) => { Some(FloatType::F32) }; (@ty f64) => { Some(FloatType::F64) }; diff --git a/src/impls.rs b/src/impls.rs index 251f350..61a314d 100644 --- a/src/impls.rs +++ b/src/impls.rs @@ -16,6 +16,15 @@ macro_rules! helper { }; } +/// Like `helper!` but without reference types. +macro_rules! helper_no_refs { + ($callback:ident, $($input:tt)*) => { + $callback!([proc_macro::] => $($input)*); + #[cfg(feature = "proc-macro2")] + $callback!([proc_macro2::] => $($input)*); + }; +} + // ============================================================================================== // ===== `From<*Lit> for Literal` @@ -101,7 +110,7 @@ helper!(impl_tt_to_lit, ); // ============================================================================================== -// ===== `TryFrom<pm::Literal> for *Lit` and `TryFrom<pm::TokenTree> for *Lit` +// ===== `TryFrom<pm::Literal>`, `TryFrom<pm::TokenTree>` for non-bool `*Lit` // ============================================================================================== fn kind_of(lit: &Literal<String>) -> TokenKind { @@ -165,6 +174,43 @@ helper!(impl_for_specific_lit, crate::StringLit<String>, String, StringLit); helper!(impl_for_specific_lit, crate::ByteLit<String>, Byte, ByteLit); helper!(impl_for_specific_lit, crate::ByteStringLit<String>, ByteString, ByteStringLit); + +// ============================================================================================== +// ===== `From<*Lit> for pm::Literal` +// ============================================================================================== + +macro_rules! impl_specific_lit_to_pm_lit { + ([$($prefix:tt)*] => $ty:ident, $variant:ident, $kind:ident) => { + impl<B: crate::Buffer> From<crate::$ty<B>> for $($prefix)* Literal { + fn from(l: crate::$ty<B>) -> Self { + // This should never fail: an input that is parsed successfuly + // as one of our literal types should always parse as a + // proc_macro literal as well! + l.raw_input().parse().unwrap_or_else(|e| { + panic!( + "failed to parse `{}` as `{}`: {}", + l.raw_input(), + std::any::type_name::<Self>(), + e, + ) + }) + } + } + }; +} + +helper_no_refs!(impl_specific_lit_to_pm_lit, IntegerLit, Integer, IntegerLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, FloatLit, Float, FloatLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, CharLit, Char, CharLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, StringLit, String, StringLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, ByteLit, Byte, ByteLit); +helper_no_refs!(impl_specific_lit_to_pm_lit, ByteStringLit, ByteString, ByteStringLit); + + +// ============================================================================================== +// ===== `TryFrom<pm::TokenTree> for BoolLit` +// ============================================================================================== + macro_rules! impl_from_tt_for_bool { ([$($prefix:tt)*] => ) => { impl TryFrom<$($prefix)* TokenTree> for crate::BoolLit { @@ -195,6 +241,22 @@ macro_rules! impl_from_tt_for_bool { helper!(impl_from_tt_for_bool, ); +// ============================================================================================== +// ===== `From<BoolLit> for pm::Ident` +// ============================================================================================== + +macro_rules! impl_bool_lit_to_pm_lit { + ([$($prefix:tt)*] => ) => { + impl From<crate::BoolLit> for $($prefix)* Ident { + fn from(l: crate::BoolLit) -> Self { + Self::new(l.as_str(), $($prefix)* Span::call_site()) + } + } + }; +} + +helper_no_refs!(impl_bool_lit_to_pm_lit, ); + mod tests { //! # Tests diff --git a/src/integer/mod.rs b/src/integer/mod.rs index 52519a6..79f7e55 100644 --- a/src/integer/mod.rs +++ b/src/integer/mod.rs @@ -25,8 +25,12 @@ use crate::{ #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub struct IntegerLit<B: Buffer> { + raw: B, + // First index of the main number part (after the base prefix). + start_main_part: usize, + // First index not part of the main number part. + end_main_part: usize, base: IntegerBase, - main_part: B, type_suffix: Option<IntegerType>, } @@ -74,7 +78,24 @@ impl<B: Buffer> IntegerLit<B> { /// invalid or represents a different kind of literal. pub fn parse(input: B) -> Result<Self, ParseError> { match first_byte_or_empty(&input)? { - digit @ b'0'..=b'9' => Self::parse_impl(input, digit), + digit @ b'0'..=b'9' => { + // TODO: simplify once RFC 2528 is stabilized + let IntegerLit { + start_main_part, + end_main_part, + base, + type_suffix, + .. + } = parse_impl(&input, digit)?; + + Ok(Self { + raw: input, + start_main_part, + end_main_part, + base, + type_suffix, + }) + }, _ => Err(perr(0, DoesNotStartWithDigit)), } } @@ -93,7 +114,7 @@ impl<B: Buffer> IntegerLit<B> { }; let mut acc = N::from_small_number(0); - for digit in self.main_part.bytes() { + for digit in self.raw_main_part().bytes() { if digit == b'_' { continue; } @@ -118,7 +139,7 @@ impl<B: Buffer> IntegerLit<B> { /// The main part containing the digits and potentially `_`. Do not try to /// parse this directly as that would ignore the base! pub fn raw_main_part(&self) -> &str { - &self.main_part + &(*self.raw)[self.start_main_part..self.end_main_part] } /// The type suffix, if specified. @@ -126,73 +147,14 @@ impl<B: Buffer> IntegerLit<B> { self.type_suffix } - /// Precondition: first byte of string has to be in `b'0'..=b'9'`. - pub(crate) fn parse_impl(input: B, first: u8) -> Result<Self, ParseError> { - // Figure out base and strip prefix base, if it exists. - let (end_prefix, base) = match (first, input.as_bytes().get(1)) { - (b'0', Some(b'b')) => (2, IntegerBase::Binary), - (b'0', Some(b'o')) => (2, IntegerBase::Octal), - (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal), - - // Everything else is treated as decimal. Several cases are caught - // by this: - // - "123" - // - "0" - // - "0u8" - // - "0r" -> this will error later - _ => (0, IntegerBase::Decimal), - }; - let without_prefix = &input[end_prefix..]; - - // Find end of main part. - let end_main = without_prefix.bytes() - .position(|b| !matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_')) - .unwrap_or(without_prefix.len()); - let (main_part, type_suffix) = without_prefix.split_at(end_main); - - // Check for invalid digits and make sure there is at least one valid digit. - let invalid_digit_pos = match base { - IntegerBase::Binary => main_part.bytes() - .position(|b| !matches!(b, b'0' | b'1' | b'_')), - IntegerBase::Octal => main_part.bytes() - .position(|b| !matches!(b, b'0'..=b'7' | b'_')), - IntegerBase::Decimal => main_part.bytes() - .position(|b| !matches!(b, b'0'..=b'9' | b'_')), - IntegerBase::Hexadecimal => None, - }; - - if let Some(pos) = invalid_digit_pos { - return Err(perr(end_prefix + pos, InvalidDigit)); - } - - if main_part.bytes().filter(|&b| b != b'_').count() == 0 { - return Err(perr(end_prefix..end_prefix + end_main, NoDigits)); - } - - - // Parse type suffix - let type_suffix = match type_suffix { - "" => None, - "u8" => Some(IntegerType::U8), - "u16" => Some(IntegerType::U16), - "u32" => Some(IntegerType::U32), - "u64" => Some(IntegerType::U64), - "u128" => Some(IntegerType::U128), - "usize" => Some(IntegerType::Usize), - "i8" => Some(IntegerType::I8), - "i16" => Some(IntegerType::I16), - "i32" => Some(IntegerType::I32), - "i64" => Some(IntegerType::I64), - "i128" => Some(IntegerType::I128), - "isize" => Some(IntegerType::Isize), - _ => return Err(perr(end_main + end_prefix..input.len(), InvalidIntegerTypeSuffix)), - }; + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } - Ok(Self { - base, - main_part: input.cut(end_prefix..end_main + end_prefix), - type_suffix, - }) + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw } } @@ -201,8 +163,10 @@ impl IntegerLit<&str> { /// `Self`. pub fn to_owned(&self) -> IntegerLit<String> { IntegerLit { + raw: self.raw.to_owned(), + start_main_part: self.start_main_part, + end_main_part: self.end_main_part, base: self.base, - main_part: self.main_part.to_owned(), type_suffix: self.type_suffix, } } @@ -210,22 +174,7 @@ impl IntegerLit<&str> { impl<B: Buffer> fmt::Display for IntegerLit<B> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let suffix = match self.type_suffix { - None => "", - Some(IntegerType::U8) => "u8", - Some(IntegerType::U16) => "u16", - Some(IntegerType::U32) => "u32", - Some(IntegerType::U64) => "u64", - Some(IntegerType::U128) => "u128", - Some(IntegerType::Usize) => "usize", - Some(IntegerType::I8) => "i8", - Some(IntegerType::I16) => "i16", - Some(IntegerType::I32) => "i32", - Some(IntegerType::I64) => "i64", - Some(IntegerType::I128) => "i128", - Some(IntegerType::Isize) => "isize", - }; - write!(f, "{}{}{}", self.base.prefix(), &*self.main_part, suffix) + write!(f, "{}", &*self.raw) } } @@ -280,6 +229,78 @@ mod sealed { pub trait Sealed {} } +/// Precondition: first byte of string has to be in `b'0'..=b'9'`. +#[inline(never)] +pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> { + // Figure out base and strip prefix base, if it exists. + let (end_prefix, base) = match (first, input.as_bytes().get(1)) { + (b'0', Some(b'b')) => (2, IntegerBase::Binary), + (b'0', Some(b'o')) => (2, IntegerBase::Octal), + (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal), + + // Everything else is treated as decimal. Several cases are caught + // by this: + // - "123" + // - "0" + // - "0u8" + // - "0r" -> this will error later + _ => (0, IntegerBase::Decimal), + }; + let without_prefix = &input[end_prefix..]; + + // Find end of main part. + let end_main = without_prefix.bytes() + .position(|b| !matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_')) + .unwrap_or(without_prefix.len()); + let (main_part, type_suffix) = without_prefix.split_at(end_main); + + // Check for invalid digits and make sure there is at least one valid digit. + let invalid_digit_pos = match base { + IntegerBase::Binary => main_part.bytes() + .position(|b| !matches!(b, b'0' | b'1' | b'_')), + IntegerBase::Octal => main_part.bytes() + .position(|b| !matches!(b, b'0'..=b'7' | b'_')), + IntegerBase::Decimal => main_part.bytes() + .position(|b| !matches!(b, b'0'..=b'9' | b'_')), + IntegerBase::Hexadecimal => None, + }; + + if let Some(pos) = invalid_digit_pos { + return Err(perr(end_prefix + pos, InvalidDigit)); + } + + if main_part.bytes().filter(|&b| b != b'_').count() == 0 { + return Err(perr(end_prefix..end_prefix + end_main, NoDigits)); + } + + + // Parse type suffix + let type_suffix = match type_suffix { + "" => None, + "u8" => Some(IntegerType::U8), + "u16" => Some(IntegerType::U16), + "u32" => Some(IntegerType::U32), + "u64" => Some(IntegerType::U64), + "u128" => Some(IntegerType::U128), + "usize" => Some(IntegerType::Usize), + "i8" => Some(IntegerType::I8), + "i16" => Some(IntegerType::I16), + "i32" => Some(IntegerType::I32), + "i64" => Some(IntegerType::I64), + "i128" => Some(IntegerType::I128), + "isize" => Some(IntegerType::Isize), + _ => return Err(perr(end_main + end_prefix..input.len(), InvalidIntegerTypeSuffix)), + }; + + Ok(IntegerLit { + raw: input, + start_main_part: end_prefix, + end_main_part: end_main + end_prefix, + base, + type_suffix, + }) +} + #[cfg(test)] mod tests; diff --git a/src/integer/tests.rs b/src/integer/tests.rs index dc8c861..1656345 100644 --- a/src/integer/tests.rs +++ b/src/integer/tests.rs @@ -1,7 +1,7 @@ use std::fmt::{Debug, Display}; use crate::{ FromIntegerLiteral, Literal, IntegerLit, IntegerType as Ty, IntegerBase, IntegerBase::*, - test_util::assert_parse_ok_eq, + test_util::{assert_parse_ok_eq, assert_roundtrip}, }; @@ -15,11 +15,18 @@ fn check<T: FromIntegerLiteral + PartialEq + Debug + Display>( main_part: &str, type_suffix: Option<Ty>, ) { - let expected_integer = IntegerLit { base, main_part, type_suffix }; + let expected_integer = IntegerLit { + raw: input, + start_main_part: base.prefix().len(), + end_main_part: base.prefix().len() + main_part.len(), + base, + type_suffix + }; assert_parse_ok_eq( input, IntegerLit::parse(input), expected_integer.clone(), "IntegerLit::parse"); assert_parse_ok_eq( input, Literal::parse(input), Literal::Integer(expected_integer), "Literal::parse"); + assert_roundtrip(expected_integer.to_owned(), input); let actual_value = IntegerLit::parse(input) .unwrap() diff --git a/src/parse.rs b/src/parse.rs index 07026ed..a0266da 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -16,7 +16,7 @@ use crate::{ impl<B: Buffer> Literal<B> { /// Parses the given input as a Rust literal. pub fn parse(input: B) -> Result<Self, ParseError> { - let first = first_byte_or_empty(&input)?; + let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?; let second = input.as_bytes().get(1).copied(); match first { @@ -24,32 +24,32 @@ impl<B: Buffer> Literal<B> { b't' if &*input == "true" => Ok(Self::Bool(BoolLit::True)), // A number literal (integer or float). - digit @ b'0'..=b'9' => { + b'0'..=b'9' => { // To figure out whether this is a float or integer, we do some // quick inspection here. Yes, this is technically duplicate // work with what is happening in the integer/float parse // methods, but it makes the code way easier for now and won't // be a huge performance loss. - let end = 1 + end_dec_digits(&input[1..]); + let end = 1 + end_dec_digits(rest); match input.as_bytes().get(end) { // Potential chars in integer literals: b, o, x for base; u // and i for type suffix. None | Some(b'b') | Some(b'o') | Some(b'x') | Some(b'u') | Some(b'i') - => IntegerLit::parse_impl(input, digit).map(Literal::Integer), + => IntegerLit::parse(input).map(Literal::Integer), // Potential chars for float literals: `.` as fractional // period, e and E as exponent start and f as type suffix. Some(b'.') | Some(b'e') | Some(b'E') | Some(b'f') - => FloatLit::parse_impl(input).map(Literal::Float), + => FloatLit::parse(input).map(Literal::Float), _ => Err(perr(end, UnexpectedChar)), } }, - b'\'' => CharLit::parse_impl(input).map(Literal::Char), + b'\'' => CharLit::parse(input).map(Literal::Char), b'"' | b'r' => StringLit::parse_impl(input).map(Literal::String), - b'b' if second == Some(b'\'') => ByteLit::parse_impl(input).map(Literal::Byte), + b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte), b'b' if second == Some(b'r') || second == Some(b'"') => ByteStringLit::parse_impl(input).map(Literal::ByteString), @@ -65,8 +65,8 @@ pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> { /// Returns the index of the first non-underscore, non-decimal digit in `input`, /// or the `input.len()` if all characters are decimal digits. -pub(crate) fn end_dec_digits(input: &str) -> usize { - input.bytes() +pub(crate) fn end_dec_digits(input: &[u8]) -> usize { + input.iter() .position(|b| !matches!(b, b'_' | b'0'..=b'9')) .unwrap_or(input.len()) } diff --git a/src/string/mod.rs b/src/string/mod.rs index a21f7a7..ab1cc3f 100644 --- a/src/string/mod.rs +++ b/src/string/mod.rs @@ -59,6 +59,16 @@ impl<B: Buffer> StringLit<B> { self.num_hashes.is_some() } + /// Returns the raw input that was passed to `parse`. + pub fn raw_input(&self) -> &str { + &self.raw + } + + /// Returns the raw input that was passed to `parse`, potentially owned. + pub fn into_raw_input(self) -> B { + self.raw + } + /// The range within `self.raw` that excludes the quotes and potential `r#`. fn inner_range(&self) -> Range<usize> { match self.num_hashes { diff --git a/src/string/tests.rs b/src/string/tests.rs index 8d8882e..51519ab 100644 --- a/src/string/tests.rs +++ b/src/string/tests.rs @@ -1,4 +1,4 @@ -use crate::{Literal, StringLit, test_util::assert_parse_ok_eq}; +use crate::{Literal, StringLit, test_util::{assert_parse_ok_eq, assert_roundtrip}}; // ===== Utility functions ======================================================================= @@ -13,9 +13,10 @@ macro_rules! check { assert_parse_ok_eq(input, StringLit::parse(input), expected.clone(), "StringLit::parse"); assert_parse_ok_eq( - input, Literal::parse(input), Literal::String(expected), "Literal::parse"); + input, Literal::parse(input), Literal::String(expected.clone()), "Literal::parse"); assert_eq!(StringLit::parse(input).unwrap().value(), $lit); assert_eq!(StringLit::parse(input).unwrap().into_value(), $lit); + assert_roundtrip(expected.into_owned(), input); }; } diff --git a/src/test_util.rs b/src/test_util.rs index dc7a1f6..fd284e9 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -39,6 +39,53 @@ pub(crate) fn assert_parse_ok_eq<T: PartialEq + Debug + Display>( } } +// This is not ideal, but to perform this check we need `proc-macro2`. So we +// just don't do anything if that feature is not enabled. +#[cfg(not(feature = "proc-macro2"))] +pub(crate) fn assert_roundtrip<T>(_: T, _: &str) {} + +#[cfg(feature = "proc-macro2")] +#[track_caller] +pub(crate) fn assert_roundtrip<T>(ours: T, input: &str) +where + T: std::convert::TryFrom<proc_macro2::Literal> + fmt::Debug + PartialEq + Clone, + proc_macro2::Literal: From<T>, + <T as std::convert::TryFrom<proc_macro2::Literal>>::Error: std::fmt::Display, +{ + let pm_lit = input.parse::<proc_macro2::Literal>() + .expect("failed to parse input as proc_macro2::Literal"); + let t_name = std::any::type_name::<T>(); + + // Unfortunately, `proc_macro2::Literal` does not implement `PartialEq`, so + // this is the next best thing. + if proc_macro2::Literal::from(ours.clone()).to_string() != pm_lit.to_string() { + panic!( + "Converting {} to proc_macro2::Literal has unexpected result:\ + \nconverted: {:?}\nexpected: {:?}", + t_name, + proc_macro2::Literal::from(ours), + pm_lit, + ); + } + + match T::try_from(pm_lit) { + Err(e) => { + panic!("Trying to convert proc_macro2::Literal to {} results in error: {}", t_name, e); + } + Ok(res) => { + if res != ours { + panic!( + "Converting proc_macro2::Literal to {} has unexpected result:\ + \nactual: {:?}\nexpected: {:?}", + t_name, + res, + ours, + ); + } + } + } +} + macro_rules! assert_err { ($ty:ident, $input:literal, $kind:ident, $( $span:tt )+ ) => { assert_err_single!($ty::parse($input), $kind, $($span)+); |