aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-04-28 16:01:39 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-04-28 16:01:39 +0000
commit4bd38fa79f73ee9255956db6c6da596b6f023351 (patch)
tree33494385374d5e714dc15a92b7428b465705b359
parent809909f415e6b784ef5513d01f1b51d4425ede4b (diff)
parentc271f3ec90c6dbc5def42e6497f7c0d9ebb05fba (diff)
downloadtextwrap-android13-frc-resolv-release.tar.gz
Snap for 8512216 from c271f3ec90c6dbc5def42e6497f7c0d9ebb05fba to tm-frc-resolv-releaset_frc_res_330443000android13-frc-resolv-release
Change-Id: Ia71721fd1dc2895c2ae3f89b18f7fb656e38e6bf
-rw-r--r--.cargo_vcs_info.json7
-rw-r--r--Android.bp2
-rw-r--r--CHANGELOG.md58
-rw-r--r--Cargo.toml39
-rw-r--r--METADATA10
-rw-r--r--README.md79
-rw-r--r--TEST_MAPPING12
-rw-r--r--rustfmt.toml1
-rw-r--r--src/core.rs29
-rw-r--r--src/lib.rs600
-rw-r--r--src/word_separators.rs606
-rw-r--r--src/word_splitters.rs291
-rw-r--r--src/wrap_algorithms.rs290
-rw-r--r--src/wrap_algorithms/optimal_fit.rs337
-rw-r--r--tests/traits.rs86
15 files changed, 1240 insertions, 1207 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index bc2eb0e..bf3e802 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,6 @@
{
"git": {
- "sha1": "1964d6f19d8e84fa08e3dd8a8c986ecd26287367"
- }
-}
+ "sha1": "559e07a53bdf7de6bed5c48aacfc0ec8c8bb0c05"
+ },
+ "path_in_vcs": ""
+} \ No newline at end of file
diff --git a/Android.bp b/Android.bp
index e92f31e..eb80a6c 100644
--- a/Android.bp
+++ b/Android.bp
@@ -23,7 +23,7 @@ rust_library {
host_supported: true,
crate_name: "textwrap",
cargo_env_compat: true,
- cargo_pkg_version: "0.14.2",
+ cargo_pkg_version: "0.15.0",
srcs: ["src/lib.rs"],
edition: "2018",
apex_available: [
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cdc703e..093b9dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,64 @@
This file lists the most important changes made in each release of
`textwrap`.
+## Version 0.15.0 (2022-02-27)
+
+This is a major feature release with two main changes:
+
+* [#421](https://github.com/mgeisler/textwrap/pull/421): Use `f64`
+ instead of `usize` for fragment widths.
+
+ This fixes problems with overflows in the internal computations of
+ `wrap_optimal_fit` when fragments (words) or line lenghts had
+ extreme values, such as `usize::MAX`.
+
+* [#438](https://github.com/mgeisler/textwrap/pull/438): Simplify
+ `Options` by removing generic type parameters.
+
+ This change removes the new generic parameters introduced in version
+ 0.14, as well as the original `WrapSplitter` parameter which has
+ been present since very early versions.
+
+ The result is a simplification of function and struct signatures
+ across the board. So what used to be
+
+ ```rust
+ let options: Options<
+ wrap_algorithms::FirstFit,
+ word_separators::AsciiSpace,
+ word_splitters::HyphenSplitter,
+ > = Options::new(80);
+ ```
+
+ if types are fully written out, is now simply
+
+ ```rust
+ let options: Options<'_> = Options::new(80);
+ ```
+
+ The anonymous lifetime represent the lifetime of the
+ `initial_indent` and `subsequent_indent` strings. The change is
+ nearly performance neutral (a 1-2% regression).
+
+Smaller improvements and changes:
+
+* [#404](https://github.com/mgeisler/textwrap/pull/404): Make
+ documentation for short last-line penalty more precise.
+* [#405](https://github.com/mgeisler/textwrap/pull/405): Cleanup and
+ simplify `Options` docstring.
+* [#411](https://github.com/mgeisler/textwrap/pull/411): Default to
+ `OptimalFit` in interactive example.
+* [#415](https://github.com/mgeisler/textwrap/pull/415): Add demo
+ program to help compute binary sizes.
+* [#423](https://github.com/mgeisler/textwrap/pull/423): Add fuzz
+ tests with fully arbitrary fragments.
+* [#424](https://github.com/mgeisler/textwrap/pull/424): Change
+ `wrap_optimal_fit` penalties to non-negative numbers.
+* [#430](https://github.com/mgeisler/textwrap/pull/430): Add
+ `debug-words` example.
+* [#432](https://github.com/mgeisler/textwrap/pull/432): Use precise
+ dependency versions in Cargo.toml.
+
## Version 0.14.2 (2021-06-27)
The 0.14.1 release included more changes than intended and has been
diff --git a/Cargo.toml b/Cargo.toml
index 69acb0f..0e4c788 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,17 +3,16 @@
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g., crates.io) dependencies
+# to registry (e.g., crates.io) dependencies.
#
-# If you believe there's an error in this file please file an
-# issue against the rust-lang/cargo repository. If you're
-# editing this file be aware that the upstream Cargo.toml
-# will likely look very different (and much more reasonable)
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
[package]
edition = "2018"
name = "textwrap"
-version = "0.14.2"
+version = "0.15.0"
authors = ["Martin Geisler <martin@geisler.net>"]
exclude = [".github/", ".gitignore", "benches/", "examples/", "fuzz/", "images/"]
description = "Powerful library for word wrapping, indenting, and dedenting strings"
@@ -26,6 +25,16 @@ repository = "https://github.com/mgeisler/textwrap"
[package.metadata.docs.rs]
all-features = true
+[[example]]
+name = "hyphenation"
+path = "examples/hyphenation.rs"
+required-features = ["hyphenation"]
+
+[[example]]
+name = "termwidth"
+path = "examples/termwidth.rs"
+required-features = ["terminal_size"]
+
[[bench]]
name = "linear"
path = "benches/linear.rs"
@@ -36,38 +45,38 @@ name = "indent"
path = "benches/indent.rs"
harness = false
[dependencies.hyphenation]
-version = "0.8.2"
+version = "0.8.4"
features = ["embed_en-us"]
optional = true
[dependencies.smawk]
-version = "0.3"
+version = "0.3.1"
optional = true
[dependencies.terminal_size]
-version = "0.1"
+version = "0.1.17"
optional = true
[dependencies.unicode-linebreak]
-version = "0.1"
+version = "0.1.2"
optional = true
[dependencies.unicode-width]
-version = "0.1"
+version = "0.1.9"
optional = true
[dev-dependencies.criterion]
-version = "0.3"
+version = "0.3.5"
[dev-dependencies.lipsum]
-version = "0.8"
+version = "0.8.0"
[dev-dependencies.unic-emoji-char]
version = "0.9.0"
[dev-dependencies.version-sync]
-version = "0.9"
+version = "0.9.4"
[features]
default = ["unicode-linebreak", "unicode-width", "smawk"]
[target."cfg(unix)".dev-dependencies.termion]
-version = "1.5"
+version = "1.5.6"
diff --git a/METADATA b/METADATA
index 66d4b14..8f2131c 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/textwrap/textwrap-0.14.2.crate"
+ value: "https://static.crates.io/crates/textwrap/textwrap-0.15.0.crate"
}
- version: "0.14.2"
+ version: "0.15.0"
license_type: NOTICE
last_upgrade_date {
- year: 2021
- month: 8
- day: 9
+ year: 2022
+ month: 3
+ day: 1
}
}
diff --git a/README.md b/README.md
index b32924c..9eeea07 100644
--- a/README.md
+++ b/README.md
@@ -16,16 +16,19 @@ drawn on a [HTML5 canvas using WebAssembly][wasm-demo].
To use the textwrap crate, add this to your `Cargo.toml` file:
```toml
[dependencies]
-textwrap = "0.14"
+textwrap = "0.15"
```
By default, this enables word wrapping with support for Unicode
strings. Extra features can be enabled with Cargo features—and the
Unicode support can be disabled if needed. This allows you slim down
the library and so you will only pay for the features you actually
-use. Please see the [_Cargo Features_ in the crate
+use.
+
+Please see the [_Cargo Features_ in the crate
documentation](https://docs.rs/textwrap/#cargo-features) for a full
-list of the available features.
+list of the available features as well as their impact on the size of
+your binary.
## Documentation
@@ -33,23 +36,22 @@ list of the available features.
## Getting Started
-Word wrapping is easy using the `fill` function:
+Word wrapping is easy using the `wrap` and `fill` functions:
```rust
-fn main() {
- let text = "textwrap: an efficient and powerful library for wrapping text.";
- println!("{}", textwrap::fill(text, 28));
+#[cfg(feature = "smawk")] {
+let text = "textwrap: an efficient and powerful library for wrapping text.";
+assert_eq!(
+ textwrap::wrap(text, 28),
+ vec![
+ "textwrap: an efficient",
+ "and powerful library for",
+ "wrapping text.",
+ ]
+);
}
```
-The output is wrapped within 28 columns:
-
-```
-textwrap: an efficient
-and powerful library for
-wrapping text.
-```
-
Sharp-eyed readers will notice that the first line is 22 columns wide.
So why is the word “and” put in the second line when there is space
for it in the first line?
@@ -57,14 +59,24 @@ for it in the first line?
The explanation is that textwrap does not just wrap text one line at a
time. Instead, it uses an optimal-fit algorithm which looks ahead and
chooses line breaks which minimize the gaps left at ends of lines.
+This is controlled with the `smawk` Cargo feature, which is why the
+example is wrapped in the `cfg`-block.
Without look-ahead, the first line would be longer and the text would
look like this:
-```
-textwrap: an efficient and
-powerful library for
-wrapping text.
+```rust
+#[cfg(not(feature = "smawk"))] {
+let text = "textwrap: an efficient and powerful library for wrapping text.";
+assert_eq!(
+ textwrap::wrap(text, 28),
+ vec![
+ "textwrap: an efficient and",
+ "powerful library for",
+ "wrapping text.",
+ ]
+);
+}
```
The second line is now shorter and the text is more ragged. The kind
@@ -78,24 +90,25 @@ Your program must load the hyphenation pattern and configure
`Options::word_splitter` to use it:
```rust
+#[cfg(feature = "hyphenation")] {
use hyphenation::{Language, Load, Standard};
-use textwrap::Options;
-
-fn main() {
- let hyphenator = Standard::from_embedded(Language::EnglishUS).unwrap();
- let options = Options::new(28).word_splitter(hyphenator);
- let text = "textwrap: an efficient and powerful library for wrapping text.";
- println!("{}", fill(text, &options);
+use textwrap::{fill, Options, WordSplitter};
+
+let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+let options = textwrap::Options::new(28).word_splitter(WordSplitter::Hyphenation(dictionary));
+let text = "textwrap: an efficient and powerful library for wrapping text.";
+
+assert_eq!(
+ textwrap::wrap(text, &options),
+ vec![
+ "textwrap: an efficient and",
+ "powerful library for wrap-",
+ "ping text."
+ ]
+);
}
```
-The output now looks like this:
-```
-textwrap: an efficient and
-powerful library for wrap-
-ping text.
-```
-
The US-English hyphenation patterns are embedded when you enable the
`hyphenation` feature. They are licensed under a [permissive
license][en-us license] and take up about 88 KB in your binary. If you
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 27b2010..07d379e 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -28,6 +28,9 @@
"name": "authfs_device_test_src_lib"
},
{
+ "name": "diced_test"
+ },
+ {
"name": "keystore2_test"
},
{
@@ -35,9 +38,6 @@
},
{
"name": "legacykeystore_test"
- },
- {
- "name": "open_then_run_module"
}
],
"presubmit-rust": [
@@ -51,6 +51,9 @@
"name": "authfs_device_test_src_lib"
},
{
+ "name": "diced_test"
+ },
+ {
"name": "keystore2_test"
},
{
@@ -58,9 +61,6 @@
},
{
"name": "legacykeystore_test"
- },
- {
- "name": "open_then_run_module"
}
]
}
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..c1578aa
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1 @@
+imports_granularity = "Module"
diff --git a/src/core.rs b/src/core.rs
index af02460..0ab4ef8 100644
--- a/src/core.rs
+++ b/src/core.rs
@@ -13,10 +13,9 @@
//! how to do this for text.
//!
//! 2. Potentially split your fragments into smaller pieces. This
-//! allows you to implement things like hyphenation. If you are
-//! wrapping text represented as a sequence of [`Word`]s, then you
-//! can use [`split_words`](crate::word_splitters::split_words) can
-//! help you do this.
+//! allows you to implement things like hyphenation. If you use the
+//! `Word` type, you can use [`WordSplitter`](crate::WordSplitter)
+//! enum for this.
//!
//! 3. Potentially break apart fragments that are still too large to
//! fit on a single line. This is implemented in [`break_words`].
@@ -197,15 +196,15 @@ pub fn display_width(text: &str) -> usize {
/// the displayed width of each part, which this trait provides.
pub trait Fragment: std::fmt::Debug {
/// Displayed width of word represented by this fragment.
- fn width(&self) -> usize;
+ fn width(&self) -> f64;
/// Displayed width of the whitespace that must follow the word
/// when the word is not at the end of a line.
- fn whitespace_width(&self) -> usize;
+ fn whitespace_width(&self) -> f64;
/// Displayed width of the penalty that must be inserted if the
/// word falls at the end of a line.
- fn penalty_width(&self) -> usize;
+ fn penalty_width(&self) -> f64;
}
/// A piece of wrappable text, including any trailing whitespace.
@@ -241,7 +240,7 @@ impl<'a> Word<'a> {
let trimmed = word.trim_end_matches(' ');
Word {
word: trimmed,
- width: display_width(&trimmed),
+ width: display_width(trimmed),
whitespace: &word[trimmed.len()..],
penalty: "",
}
@@ -304,22 +303,22 @@ impl<'a> Word<'a> {
impl Fragment for Word<'_> {
#[inline]
- fn width(&self) -> usize {
- self.width
+ fn width(&self) -> f64 {
+ self.width as f64
}
// We assume the whitespace consist of ' ' only. This allows us to
// compute the display width in constant time.
#[inline]
- fn whitespace_width(&self) -> usize {
- self.whitespace.len()
+ fn whitespace_width(&self) -> f64 {
+ self.whitespace.len() as f64
}
// We assume the penalty is `""` or `"-"`. This allows us to
// compute the display width in constant time.
#[inline]
- fn penalty_width(&self) -> usize {
- self.penalty.len()
+ fn penalty_width(&self) -> f64 {
+ self.penalty.len() as f64
}
}
@@ -334,7 +333,7 @@ where
{
let mut shortened_words = Vec::new();
for word in words {
- if word.width() > line_width {
+ if word.width() > line_width as f64 {
shortened_words.extend(word.break_apart(line_width));
} else {
shortened_words.push(word);
diff --git a/src/lib.rs b/src/lib.rs
index f2f5542..6d68309 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,47 +7,35 @@
//! you want to format dynamic output nicely so it looks good in a
//! terminal. A quick example:
//!
-//! ```no_run
-//! fn main() {
-//! let text = "textwrap: a small library for wrapping text.";
-//! println!("{}", textwrap::fill(text, 18));
-//! }
//! ```
-//!
-//! When you run this program, it will display the following output:
-//!
-//! ```text
-//! textwrap: a small
-//! library for
-//! wrapping text.
+//! # #[cfg(feature = "smawk")] {
+//! let text = "textwrap: a small library for wrapping text.";
+//! assert_eq!(textwrap::wrap(text, 18),
+//! vec!["textwrap: a",
+//! "small library for",
+//! "wrapping text."]);
+//! # }
//! ```
//!
+//! The [`wrap`] function returns the individual lines, use [`fill`]
+//! is you want the lines joined with `'\n'` to form a `String`.
+//!
//! If you enable the `hyphenation` Cargo feature, you can get
//! automatic hyphenation for a number of languages:
//!
-//! ```no_run
-//! # #[cfg(feature = "hyphenation")]
+//! ```
+//! #[cfg(feature = "hyphenation")] {
//! use hyphenation::{Language, Load, Standard};
-//! use textwrap::{fill, Options};
+//! use textwrap::{wrap, Options, WordSplitter};
//!
-//! # #[cfg(feature = "hyphenation")]
-//! fn main() {
-//! let text = "textwrap: a small library for wrapping text.";
-//! let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
-//! let options = Options::new(18).word_splitter(dictionary);
-//! println!("{}", fill(text, &options));
+//! let text = "textwrap: a small library for wrapping text.";
+//! let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+//! let options = Options::new(18).word_splitter(WordSplitter::Hyphenation(dictionary));
+//! assert_eq!(wrap(text, &options),
+//! vec!["textwrap: a small",
+//! "library for wrap-",
+//! "ping text."]);
//! }
-//!
-//! # #[cfg(not(feature = "hyphenation"))]
-//! # fn main() { }
-//! ```
-//!
-//! The program will now output:
-//!
-//! ```text
-//! textwrap: a small
-//! library for wrap-
-//! ping text.
//! ```
//!
//! See also the [`unfill`] and [`refill`] functions which allow you to
@@ -124,7 +112,7 @@
//! The full dependency graph, where dashed lines indicate optional
//! dependencies, is shown below:
//!
-//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.14.2.svg">
+//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.15.0.svg">
//!
//! ## Default Features
//!
@@ -138,8 +126,7 @@
//! This feature can be disabled if you are happy to find words
//! separated by ASCII space characters only. People wrapping text
//! with emojis or East-Asian characters will want most likely want
-//! to enable this feature. See the
-//! [`word_separators::WordSeparator`] trait for details.
+//! to enable this feature. See [`WordSeparator`] for details.
//!
//! * `unicode-width`: enables correct width computation of non-ASCII
//! characters via the [unicode-width] crate. Without this feature,
@@ -159,6 +146,29 @@
//! This feature can be disabled if you only ever intend to use
//! [`wrap_algorithms::wrap_first_fit`].
//!
+//! With Rust 1.59.0, the size impact of the above features on your
+//! binary is as follows:
+//!
+//! | Configuration | Binary Size | Delta |
+//! | :--- | ---: | ---: |
+//! | quick-and-dirty implementation | 289 KB | — KB |
+//! | textwrap without default features | 301 KB | 12 KB |
+//! | textwrap with smawk | 317 KB | 28 KB |
+//! | textwrap with unicode-width | 313 KB | 24 KB |
+//! | textwrap with unicode-linebreak | 395 KB | 106 KB |
+//!
+//! The above sizes are the stripped sizes and the binary is compiled
+//! in release mode with this profile:
+//!
+//! ```toml
+//! [profile.release]
+//! lto = true
+//! codegen-units = 1
+//! ```
+//!
+//! See the [binary-sizes demo] if you want to reproduce these
+//! results.
+//!
//! ## Optional Features
//!
//! These Cargo features enable new functionality:
@@ -168,71 +178,61 @@
//! [`Options::with_termwidth`] constructor for details.
//!
//! * `hyphenation`: enables language-sensitive hyphenation via the
-//! [hyphenation] crate. See the [`word_splitters::WordSplitter`] trait for details.
+//! [hyphenation] crate. See the [`word_splitters::WordSplitter`]
+//! trait for details.
//!
//! [unicode-linebreak]: https://docs.rs/unicode-linebreak/
//! [unicode-width]: https://docs.rs/unicode-width/
//! [smawk]: https://docs.rs/smawk/
+//! [binary-sizes demo]: https://github.com/mgeisler/textwrap/tree/master/examples/binary-sizes
//! [textwrap-macros]: https://docs.rs/textwrap-macros/
//! [terminal_size]: https://docs.rs/terminal_size/
//! [hyphenation]: https://docs.rs/hyphenation/
-#![doc(html_root_url = "https://docs.rs/textwrap/0.14.2")]
+#![doc(html_root_url = "https://docs.rs/textwrap/0.15.0")]
#![forbid(unsafe_code)] // See https://github.com/mgeisler/textwrap/issues/210
#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
#![allow(clippy::redundant_field_names)]
+// Make `cargo test` execute the README doctests.
+#[cfg(doctest)]
+#[doc = include_str!("../README.md")]
+mod readme_doctest {}
+
use std::borrow::Cow;
mod indentation;
-pub use crate::indentation::dedent;
-pub use crate::indentation::indent;
+pub use crate::indentation::{dedent, indent};
+
+mod word_separators;
+pub use word_separators::WordSeparator;
-pub mod word_separators;
pub mod word_splitters;
+pub use word_splitters::WordSplitter;
+
pub mod wrap_algorithms;
+pub use wrap_algorithms::WrapAlgorithm;
pub mod core;
-// These private macros lets us hide the actual WrapAlgorithm and
-// WordSeperator used in the function signatures below.
-#[cfg(feature = "smawk")]
-macro_rules! DefaultWrapAlgorithm {
- () => {
- wrap_algorithms::OptimalFit
- };
-}
-
-#[cfg(not(feature = "smawk"))]
-macro_rules! DefaultWrapAlgorithm {
- () => {
- wrap_algorithms::FirstFit
- };
-}
-
#[cfg(feature = "unicode-linebreak")]
macro_rules! DefaultWordSeparator {
() => {
- word_separators::UnicodeBreakProperties
+ WordSeparator::UnicodeBreakProperties
};
}
#[cfg(not(feature = "unicode-linebreak"))]
macro_rules! DefaultWordSeparator {
() => {
- word_separators::AsciiSpace
+ WordSeparator::AsciiSpace
};
}
-/// Holds settings for wrapping and filling text.
+/// Holds configuration options for wrapping and filling text.
#[derive(Debug, Clone)]
-pub struct Options<
- 'a,
- WrapAlgo = Box<dyn wrap_algorithms::WrapAlgorithm>,
- WordSep = Box<dyn word_separators::WordSeparator>,
- WordSplit = Box<dyn word_splitters::WordSplitter>,
-> {
+pub struct Options<'a> {
/// The width in columns at which the text will be wrapped.
pub width: usize,
/// Indentation used for the first line of output. See the
@@ -247,62 +247,42 @@ pub struct Options<
pub break_words: bool,
/// Wrapping algorithm to use, see the implementations of the
/// [`wrap_algorithms::WrapAlgorithm`] trait for details.
- pub wrap_algorithm: WrapAlgo,
+ pub wrap_algorithm: WrapAlgorithm,
/// The line breaking algorithm to use, see
/// [`word_separators::WordSeparator`] trait for an overview and
/// possible implementations.
- pub word_separator: WordSep,
+ pub word_separator: WordSeparator,
/// The method for splitting words. This can be used to prohibit
/// splitting words on hyphens, or it can be used to implement
- /// language-aware machine hyphenation. Please see the
- /// [`word_splitters::WordSplitter`] trait for details.
- pub word_splitter: WordSplit,
+ /// language-aware machine hyphenation.
+ pub word_splitter: WordSplitter,
}
-impl<'a, WrapAlgo, WordSep, WordSplit> From<&'a Options<'a, WrapAlgo, WordSep, WordSplit>>
- for Options<'a, WrapAlgo, WordSep, WordSplit>
-where
- WrapAlgo: Clone,
- WordSep: Clone,
- WordSplit: Clone,
-{
- fn from(options: &'a Options<'a, WrapAlgo, WordSep, WordSplit>) -> Self {
+impl<'a> From<&'a Options<'a>> for Options<'a> {
+ fn from(options: &'a Options<'a>) -> Self {
Self {
width: options.width,
initial_indent: options.initial_indent,
subsequent_indent: options.subsequent_indent,
break_words: options.break_words,
- word_separator: options.word_separator.clone(),
- wrap_algorithm: options.wrap_algorithm.clone(),
+ word_separator: options.word_separator,
+ wrap_algorithm: options.wrap_algorithm,
word_splitter: options.word_splitter.clone(),
}
}
}
-impl<'a> From<usize>
- for Options<
- 'a,
- DefaultWrapAlgorithm!(),
- DefaultWordSeparator!(),
- word_splitters::HyphenSplitter,
- >
-{
+impl<'a> From<usize> for Options<'a> {
fn from(width: usize) -> Self {
Options::new(width)
}
}
-/// Constructors for boxed Options, specifically.
-impl<'a>
- Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), word_splitters::HyphenSplitter>
-{
- /// Creates a new [`Options`] with the specified width and static
- /// dispatch using the [`word_splitters::HyphenSplitter`].
- /// Equivalent to
+impl<'a> Options<'a> {
+ /// Creates a new [`Options`] with the specified width. Equivalent to
///
/// ```
- /// # use textwrap::word_splitters::{HyphenSplitter, WordSplitter};
- /// # use textwrap::Options;
+ /// # use textwrap::{Options, WordSplitter, WordSeparator, WrapAlgorithm};
/// # let width = 80;
/// # let actual = Options::new(width);
/// # let expected =
@@ -312,76 +292,36 @@ impl<'a>
/// subsequent_indent: "",
/// break_words: true,
/// #[cfg(feature = "unicode-linebreak")]
- /// word_separator: textwrap::word_separators::UnicodeBreakProperties,
+ /// word_separator: WordSeparator::UnicodeBreakProperties,
/// #[cfg(not(feature = "unicode-linebreak"))]
- /// word_separator: textwrap::word_separators::AsciiSpace,
+ /// word_separator: WordSeparator::AsciiSpace,
/// #[cfg(feature = "smawk")]
- /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit,
+ /// wrap_algorithm: WrapAlgorithm::new_optimal_fit(),
/// #[cfg(not(feature = "smawk"))]
- /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit,
- /// word_splitter: textwrap::word_splitters::HyphenSplitter,
+ /// wrap_algorithm: WrapAlgorithm::FirstFit,
+ /// word_splitter: WordSplitter::HyphenSplitter,
/// }
/// # ;
/// # assert_eq!(actual.width, expected.width);
/// # assert_eq!(actual.initial_indent, expected.initial_indent);
/// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent);
/// # assert_eq!(actual.break_words, expected.break_words);
+ /// # assert_eq!(actual.word_splitter, expected.word_splitter);
/// ```
///
/// Note that the default word separator and wrap algorithms
/// changes based on the available Cargo features. The best
- /// available algorithm is used by default.
- ///
- /// Static dispatch means here, that the word splitter is stored as-is
- /// and the type is known at compile-time. Thus the returned value
- /// is actually a `Options<AsciiSpace, HyphenSplitter>`.
- ///
- /// Dynamic dispatch on the other hand, means that the word
- /// separator and/or word splitter is stored as a trait object
- /// such as a `Box<dyn word_splitters::WordSplitter>`. This way
- /// the word splitter's inner type can be changed without changing
- /// the type of this struct, which then would be just `Options` as
- /// a short cut for `Options<Box<dyn
- /// word_separators::WordSeparator>, Box<dyn
- /// word_splitters::WordSplitter>>`.
- ///
- /// The value and type of the word splitter can be choose from the
- /// start using the [`Options::with_word_splitter`] constructor or
- /// changed afterwards using the [`Options::word_splitter`]
- /// method. Whether static or dynamic dispatch is used, depends on
- /// whether these functions are given a boxed
- /// [`word_splitters::WordSplitter`] or not. Take for example:
- ///
- /// ```
- /// use textwrap::Options;
- /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation};
- /// # use textwrap::word_splitters::WordSplitter;
- /// # use textwrap::word_separators::AsciiSpace;
- /// # let width = 80;
- ///
- /// // uses HyphenSplitter with static dispatch
- /// // the actual type: Options<AsciiSpace, HyphenSplitter>
- /// let opt = Options::new(width);
- ///
- /// // uses NoHyphenation with static dispatch
- /// // the actual type: Options<AsciiSpace, NoHyphenation>
- /// let opt = Options::new(width).word_splitter(NoHyphenation);
- ///
- /// // uses HyphenSplitter with dynamic dispatch
- /// // the actual type: Options<AsciiSpace, Box<dyn word_splitters::WordSplitter>>
- /// let opt: Options<_, _, _> = Options::new(width).word_splitter(Box::new(HyphenSplitter));
- ///
- /// // uses NoHyphenation with dynamic dispatch
- /// // the actual type: Options<AsciiSpace, Box<dyn word_splitters::WordSplitter>>
- /// let opt: Options<_, _, _> = Options::new(width).word_splitter(Box::new(NoHyphenation));
- /// ```
- ///
- /// Notice that the last two variables have the same type, despite
- /// the different `WordSplitter` in use. Thus dynamic dispatch
- /// allows to change the word splitter at run-time without
- /// changing the variables type.
+ /// available algorithms are used by default.
pub const fn new(width: usize) -> Self {
- Options::with_word_splitter(width, word_splitters::HyphenSplitter)
+ Options {
+ width,
+ initial_indent: "",
+ subsequent_indent: "",
+ break_words: true,
+ word_separator: DefaultWordSeparator!(),
+ wrap_algorithm: WrapAlgorithm::new(),
+ word_splitter: WordSplitter::HyphenSplitter,
+ }
}
/// Creates a new [`Options`] with `width` set to the current
@@ -407,97 +347,7 @@ impl<'a>
}
}
-impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), WordSplit> {
- /// Creates a new [`Options`] with the specified width and
- /// word splitter. Equivalent to
- ///
- /// ```
- /// # use textwrap::Options;
- /// # use textwrap::word_splitters::{NoHyphenation, HyphenSplitter};
- /// # const word_splitter: NoHyphenation = NoHyphenation;
- /// # const width: usize = 80;
- /// # let actual = Options::with_word_splitter(width, word_splitter);
- /// # let expected =
- /// Options {
- /// width: width,
- /// initial_indent: "",
- /// subsequent_indent: "",
- /// break_words: true,
- /// #[cfg(feature = "unicode-linebreak")]
- /// word_separator: textwrap::word_separators::UnicodeBreakProperties,
- /// #[cfg(not(feature = "unicode-linebreak"))]
- /// word_separator: textwrap::word_separators::AsciiSpace,
- /// #[cfg(feature = "smawk")]
- /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit,
- /// #[cfg(not(feature = "smawk"))]
- /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit,
- /// word_splitter: word_splitter,
- /// }
- /// # ;
- /// # assert_eq!(actual.width, expected.width);
- /// # assert_eq!(actual.initial_indent, expected.initial_indent);
- /// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent);
- /// # assert_eq!(actual.break_words, expected.break_words);
- /// ```
- ///
- /// This constructor allows to specify the word splitter to be
- /// used. It is like a short-cut for
- /// `Options::new(w).word_splitter(s)`, but this function is a
- /// `const fn`. The given word splitter may be in a [`Box`], which
- /// then can be coerced into a trait object for dynamic dispatch:
- ///
- /// ```
- /// use textwrap::Options;
- /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter};
- /// # const width: usize = 80;
- ///
- /// // This opt contains a boxed trait object as splitter.
- /// // The type annotation is important, otherwise it will be not a trait object
- /// let mut opt: Options<_, _, Box<dyn WordSplitter>>
- /// = Options::with_word_splitter(width, Box::new(NoHyphenation));
- /// // Its type is actually: `Options<AsciiSpace, Box<dyn word_splitters::WordSplitter>>`:
- /// let opt_coerced: Options<_, _, Box<dyn WordSplitter>> = opt;
- ///
- /// // Thus, it can be overridden with a different word splitter.
- /// opt = Options::with_word_splitter(width, Box::new(HyphenSplitter));
- /// // Now, containing a `HyphenSplitter` instead.
- /// ```
- ///
- /// Since the word splitter is given by value, which determines
- /// the generic type parameter, it can be used to produce both an
- /// [`Options`] with static and dynamic dispatch, respectively.
- /// While dynamic dispatch allows to change the type of the inner
- /// word splitter at run time as seen above, static dispatch
- /// especially can store the word splitter directly, without the
- /// need for a box. This in turn allows it to be used in constant
- /// and static context:
- ///
- /// ```
- /// use textwrap::word_splitters::HyphenSplitter; use textwrap::{ Options};
- /// use textwrap::word_separators::AsciiSpace;
- /// use textwrap::wrap_algorithms::FirstFit;
- /// # const width: usize = 80;
- ///
- /// # #[cfg(all(not(feature = "smawk"), not(feature = "unicode-linebreak")))] {
- /// const FOO: Options<FirstFit, AsciiSpace, HyphenSplitter> =
- /// Options::with_word_splitter(width, HyphenSplitter);
- /// static BAR: Options<FirstFit, AsciiSpace, HyphenSplitter> = FOO;
- /// # }
- /// ```
- pub const fn with_word_splitter(width: usize, word_splitter: WordSplit) -> Self {
- Options {
- width,
- initial_indent: "",
- subsequent_indent: "",
- break_words: true,
- word_separator: DefaultWordSeparator!(),
- wrap_algorithm: DefaultWrapAlgorithm!(),
- word_splitter: word_splitter,
- }
- }
-}
-
-impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit> {
+impl<'a> Options<'a> {
/// Change [`self.initial_indent`]. The initial indentation is
/// used on the very first line of output.
///
@@ -507,7 +357,7 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit>
/// initial indentation and wrapping each paragraph by itself:
///
/// ```
- /// use textwrap::{Options, wrap};
+ /// use textwrap::{wrap, Options};
///
/// let options = Options::new(16).initial_indent(" ");
/// assert_eq!(wrap("This is a little example.", options),
@@ -532,7 +382,7 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit>
/// single paragraph as a bullet list:
///
/// ```
- /// use textwrap::{Options, wrap};
+ /// use textwrap::{wrap, Options};
///
/// let options = Options::new(12)
/// .initial_indent("* ")
@@ -591,10 +441,7 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit>
/// See [`word_separators::WordSeparator`] for details on the choices.
///
/// [`self.word_separator`]: #structfield.word_separator
- pub fn word_separator<NewWordSep>(
- self,
- word_separator: NewWordSep,
- ) -> Options<'a, WrapAlgo, NewWordSep, WordSplit> {
+ pub fn word_separator(self, word_separator: WordSeparator) -> Options<'a> {
Options {
width: self.width,
initial_indent: self.initial_indent,
@@ -612,10 +459,7 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit>
/// the choices.
///
/// [`self.wrap_algorithm`]: #structfield.wrap_algorithm
- pub fn wrap_algorithm<NewWrapAlgo>(
- self,
- wrap_algorithm: NewWrapAlgo,
- ) -> Options<'a, NewWrapAlgo, WordSep, WordSplit> {
+ pub fn wrap_algorithm(self, wrap_algorithm: WrapAlgorithm) -> Options<'a> {
Options {
width: self.width,
initial_indent: self.initial_indent,
@@ -631,25 +475,18 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit>
/// [`word_splitters::WordSplitter`] is used to fit part of a word
/// into the current line when wrapping text.
///
- /// This function may return a different type than `Self`. That is
- /// the case when the given `splitter` is of a different type the
- /// the currently stored one in the `splitter` field. Take for
- /// example:
+ /// # Examples
///
/// ```
- /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation};
- /// use textwrap::Options;
- /// // The default type returned by `new`:
- /// let opt: Options<_, _, HyphenSplitter> = Options::new(80);
- /// // Setting a different word splitter changes the type
- /// let opt: Options<_, _, NoHyphenation> = opt.word_splitter(NoHyphenation);
+ /// use textwrap::{Options, WordSplitter};
+ /// let opt = Options::new(80);
+ /// assert_eq!(opt.word_splitter, WordSplitter::HyphenSplitter);
+ /// let opt = opt.word_splitter(WordSplitter::NoHyphenation);
+ /// assert_eq!(opt.word_splitter, WordSplitter::NoHyphenation);
/// ```
///
/// [`self.word_splitter`]: #structfield.word_splitter
- pub fn word_splitter<NewWordSplit>(
- self,
- word_splitter: NewWordSplit,
- ) -> Options<'a, WrapAlgo, WordSep, NewWordSplit> {
+ pub fn word_splitter(self, word_splitter: WordSplitter) -> Options<'a> {
Options {
width: self.width,
initial_indent: self.initial_indent,
@@ -675,11 +512,9 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit>
///
/// ```no_run
/// use textwrap::{termwidth, Options};
-/// use textwrap::word_splitters::NoHyphenation;
///
/// let width = termwidth() - 4; // Two columns on each side.
/// let options = Options::new(width)
-/// .word_splitter(NoHyphenation)
/// .initial_indent(" ")
/// .subsequent_indent(" ");
/// ```
@@ -723,12 +558,9 @@ pub fn termwidth() -> usize {
/// "- Memory safety\n without\n garbage\n collection."
/// );
/// ```
-pub fn fill<'a, WrapAlgo, WordSep, WordSplit, Opt>(text: &str, width_or_options: Opt) -> String
+pub fn fill<'a, Opt>(text: &str, width_or_options: Opt) -> String
where
- WrapAlgo: wrap_algorithms::WrapAlgorithm,
- WordSep: word_separators::WordSeparator,
- WordSplit: word_splitters::WordSplitter,
- Opt: Into<Options<'a, WrapAlgo, WordSep, WordSplit>>,
+ Opt: Into<Options<'a>>,
{
// This will avoid reallocation in simple cases (no
// indentation, no hyphenation).
@@ -738,7 +570,7 @@ where
if i > 0 {
result.push('\n');
}
- result.push_str(&line);
+ result.push_str(line);
}
result
@@ -790,12 +622,7 @@ where
/// assert_eq!(options.initial_indent, "* ");
/// assert_eq!(options.subsequent_indent, " ");
/// ```
-pub fn unfill(
- text: &str,
-) -> (
- String,
- Options<'_, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), word_splitters::HyphenSplitter>,
-) {
+pub fn unfill(text: &str) -> (String, Options<'_>) {
let trimmed = text.trim_end_matches('\n');
let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/'];
@@ -890,15 +717,9 @@ pub fn unfill(
/// item.
/// ");
/// ```
-pub fn refill<'a, WrapAlgo, WordSep, WordSplit, Opt>(
- filled_text: &str,
- new_width_or_options: Opt,
-) -> String
+pub fn refill<'a, Opt>(filled_text: &str, new_width_or_options: Opt) -> String
where
- WrapAlgo: wrap_algorithms::WrapAlgorithm,
- WordSep: word_separators::WordSeparator,
- WordSplit: word_splitters::WordSplitter,
- Opt: Into<Options<'a, WrapAlgo, WordSep, WordSplit>>,
+ Opt: Into<Options<'a>>,
{
let trimmed = filled_text.trim_end_matches('\n');
let (text, options) = unfill(trimmed);
@@ -964,7 +785,7 @@ where
/// narrow column with room for only 10 characters looks like this:
///
/// ```
-/// # use textwrap::{wrap_algorithms::FirstFit, Options, wrap};
+/// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap};
/// #
/// # let lines = wrap("To be, or not to be: that is the question",
/// # Options::new(10).wrap_algorithm(FirstFit));
@@ -988,11 +809,12 @@ where
///
/// ```
/// # #[cfg(feature = "smawk")] {
-/// # use textwrap::{Options, wrap};
-/// # use textwrap::wrap_algorithms::OptimalFit;
+/// # use textwrap::{Options, WrapAlgorithm, wrap};
/// #
-/// # let lines = wrap("To be, or not to be: that is the question",
-/// # Options::new(10).wrap_algorithm(OptimalFit));
+/// # let lines = wrap(
+/// # "To be, or not to be: that is the question",
+/// # Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit())
+/// # );
/// # assert_eq!(lines.join("\n") + "\n", "\
/// To be,
/// or not to
@@ -1002,7 +824,7 @@ where
/// # "); }
/// ```
///
-/// Please see the [`wrap_algorithms::WrapAlgorithm`] trait for details.
+/// Please see [`WrapAlgorithm`] for details on the choices.
///
/// # Examples
///
@@ -1079,15 +901,9 @@ where
/// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]);
/// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]);
/// ```
-pub fn wrap<'a, WrapAlgo, WordSep, WordSplit, Opt>(
- text: &str,
- width_or_options: Opt,
-) -> Vec<Cow<'_, str>>
+pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>>
where
- WrapAlgo: wrap_algorithms::WrapAlgorithm,
- WordSep: word_separators::WordSeparator,
- WordSplit: word_splitters::WordSplitter,
- Opt: Into<Options<'a, WrapAlgo, WordSep, WordSplit>>,
+ Opt: Into<Options<'a>>,
{
let options = width_or_options.into();
@@ -1155,7 +971,7 @@ where
result += &line[idx..idx + len];
if !last_word.penalty.is_empty() {
- result.to_mut().push_str(&last_word.penalty);
+ result.to_mut().push_str(last_word.penalty);
}
lines.push(result);
@@ -1227,7 +1043,7 @@ where
/// "| example text, | columns. | shorter than |",
/// "| which is | Notice how | the others. |",
/// "| wrapped into | the final | |"]);
-pub fn wrap_columns<'a, WrapAlgo, WordSep, WordSplit, Opt>(
+pub fn wrap_columns<'a, Opt>(
text: &str,
columns: usize,
total_width_or_options: Opt,
@@ -1236,10 +1052,7 @@ pub fn wrap_columns<'a, WrapAlgo, WordSep, WordSplit, Opt>(
right_gap: &str,
) -> Vec<String>
where
- WrapAlgo: wrap_algorithms::WrapAlgorithm,
- WordSep: word_separators::WordSeparator,
- WordSplit: word_splitters::WordSplitter,
- Opt: Into<Options<'a, WrapAlgo, WordSep, WordSplit>>,
+ Opt: Into<Options<'a>>,
{
assert!(columns > 0);
@@ -1263,8 +1076,8 @@ where
for column_no in 0..columns {
match wrapped_lines.get(line_no + column_no * lines_per_column) {
Some(column_line) => {
- line.push_str(&column_line);
- line.push_str(&" ".repeat(column_width - core::display_width(&column_line)));
+ line.push_str(column_line);
+ line.push_str(&" ".repeat(column_width - core::display_width(column_line)));
}
None => {
line.push_str(&" ".repeat(column_width));
@@ -1298,21 +1111,20 @@ where
/// [`fill`] with these options:
///
/// ```
-/// # use textwrap::{core, Options};
-/// # use textwrap::{word_separators, word_splitters, wrap_algorithms};
+/// # use textwrap::{core, Options, WordSplitter, WordSeparator, WrapAlgorithm};
/// # let width = 80;
/// Options {
/// width: width,
/// initial_indent: "",
/// subsequent_indent: "",
/// break_words: false,
-/// word_separator: word_separators::AsciiSpace,
-/// wrap_algorithm: wrap_algorithms::FirstFit,
-/// word_splitter: word_splitters::NoHyphenation,
+/// word_separator: WordSeparator::AsciiSpace,
+/// wrap_algorithm: WrapAlgorithm::FirstFit,
+/// word_splitter: WordSplitter::NoHyphenation,
/// };
/// ```
///
-/// The wrap algorithm is [`wrap_algorithms::FirstFit`] since this
+/// The wrap algorithm is [`WrapAlgorithm::FirstFit`] since this
/// is the fastest algorithm — and the main reason to use
/// `fill_inplace` is to get the string broken into newlines as fast
/// as possible.
@@ -1338,15 +1150,14 @@ where
/// benchmark](https://github.com/mgeisler/textwrap/blob/master/benches/linear.rs)
/// for details.
pub fn fill_inplace(text: &mut String, width: usize) {
- use word_separators::WordSeparator;
let mut indices = Vec::new();
let mut offset = 0;
for line in text.split('\n') {
- let words = word_separators::AsciiSpace
+ let words = WordSeparator::AsciiSpace
.find_words(line)
.collect::<Vec<_>>();
- let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width]);
+ let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width as f64]);
let mut line_offset = offset;
for words in &wrapped_words[..wrapped_words.len() - 1] {
@@ -1376,8 +1187,6 @@ pub fn fill_inplace(text: &mut String, width: usize) {
#[cfg(test)]
mod tests {
use super::*;
- use crate::word_splitters::WordSplitter;
- use crate::{word_splitters, wrap_algorithms};
#[cfg(feature = "hyphenation")]
use hyphenation::{Language, Load, Standard};
@@ -1412,7 +1221,7 @@ mod tests {
assert_eq!(
wrap(
"To be, or not to be, that is the question.",
- Options::new(10).wrap_algorithm(wrap_algorithms::FirstFit)
+ Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit)
),
vec!["To be, or", "not to be,", "that is", "the", "question."]
);
@@ -1435,7 +1244,11 @@ mod tests {
#[test]
fn max_width() {
- assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]);
+ assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]);
+
+ let text = "Hello there! This is some English text. \
+ It should not be wrapped given the extents below.";
+ assert_eq!(wrap(text, usize::MAX), vec![text]);
}
#[test]
@@ -1474,18 +1287,17 @@ mod tests {
fn issue_129() {
// The dash is an em-dash which takes up four bytes. We used
// to panic since we tried to index into the character.
- let options = Options::new(1).word_separator(word_separators::AsciiSpace);
+ let options = Options::new(1).word_separator(WordSeparator::AsciiSpace);
assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]);
}
#[test]
- #[cfg(feature = "unicode-width")]
fn wide_character_handling() {
assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
assert_eq!(
wrap(
"Hello, World!",
- Options::new(15).word_separator(word_separators::AsciiSpace)
+ Options::new(15).word_separator(WordSeparator::AsciiSpace)
),
vec!["Hello,", "World!"]
);
@@ -1496,7 +1308,7 @@ mod tests {
assert_eq!(
wrap(
"Hello, World!",
- Options::new(15).word_separator(word_separators::UnicodeBreakProperties)
+ Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties)
),
vec!["Hello, W", "orld!"]
);
@@ -1519,7 +1331,6 @@ mod tests {
}
#[test]
- #[cfg(feature = "unicode-width")]
fn indent_first_emoji() {
let options = Options::new(10).initial_indent("👉👉");
assert_eq!(
@@ -1627,34 +1438,20 @@ mod tests {
}
#[test]
- fn simple_hyphens_static() {
- let options = Options::new(8).word_splitter(word_splitters::HyphenSplitter);
- assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
- }
-
- #[test]
- fn simple_hyphens_dynamic() {
- let options: Options<_, _> =
- Options::new(8).word_splitter(Box::new(word_splitters::HyphenSplitter));
+ fn simple_hyphens() {
+ let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter);
assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
}
#[test]
- fn no_hyphenation_static() {
- let options = Options::new(8).word_splitter(word_splitters::NoHyphenation);
- assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
- }
-
- #[test]
- fn no_hyphenation_dynamic() {
- let options: Options<_, _> =
- Options::new(8).word_splitter(Box::new(word_splitters::NoHyphenation));
+ fn no_hyphenation() {
+ let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
}
#[test]
#[cfg(feature = "hyphenation")]
- fn auto_hyphenation_double_hyphenation_static() {
+ fn auto_hyphenation_double_hyphenation() {
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
let options = Options::new(10);
assert_eq!(
@@ -1662,25 +1459,7 @@ mod tests {
vec!["Internatio", "nalization"]
);
- let options = Options::new(10).word_splitter(dictionary);
- assert_eq!(
- wrap("Internationalization", &options),
- vec!["Interna-", "tionaliza-", "tion"]
- );
- }
-
- #[test]
- #[cfg(feature = "hyphenation")]
- fn auto_hyphenation_double_hyphenation_dynamic() {
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let mut options: Options<_, _, Box<dyn word_splitters::WordSplitter>> =
- Options::new(10).word_splitter(Box::new(word_splitters::HyphenSplitter));
- assert_eq!(
- wrap("Internationalization", &options),
- vec!["Internatio", "nalization"]
- );
-
- options = Options::new(10).word_splitter(Box::new(dictionary));
+ let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
assert_eq!(
wrap("Internationalization", &options),
vec!["Interna-", "tionaliza-", "tion"]
@@ -1697,7 +1476,7 @@ mod tests {
vec!["participat", "ion is", "the key to", "success"]
);
- let options = Options::new(10).word_splitter(dictionary);
+ let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
assert_eq!(
wrap("participation is the key to success", &options),
vec!["partici-", "pation is", "the key to", "success"]
@@ -1707,10 +1486,10 @@ mod tests {
#[test]
#[cfg(feature = "hyphenation")]
fn split_len_hyphenation() {
- // Test that hyphenation takes the width of the wihtespace
+ // Test that hyphenation takes the width of the whitespace
// into account.
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let options = Options::new(15).word_splitter(dictionary);
+ let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary));
assert_eq!(
wrap("garbage collection", &options),
vec!["garbage col-", "lection"]
@@ -1724,8 +1503,9 @@ mod tests {
// line is borrowed.
use std::borrow::Cow::{Borrowed, Owned};
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let options = Options::new(10).word_splitter(dictionary);
+ let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
let lines = wrap("Internationalization", &options);
+ assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]);
if let Borrowed(s) = lines[0] {
assert!(false, "should not have been borrowed: {:?}", s);
}
@@ -1747,7 +1527,7 @@ mod tests {
vec!["over-", "caffinated"]
);
- let options = options.word_splitter(dictionary);
+ let options = options.word_splitter(WordSplitter::Hyphenation(dictionary));
assert_eq!(
wrap("over-caffinated", &options),
vec!["over-", "caffi-", "nated"]
@@ -1763,7 +1543,7 @@ mod tests {
fn break_words_wide_characters() {
// Even the poor man's version of `ch_width` counts these
// characters as wide.
- let options = Options::new(5).word_separator(word_separators::AsciiSpace);
+ let options = Options::new(5).word_separator(WordSeparator::AsciiSpace);
assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]);
}
@@ -1801,14 +1581,14 @@ mod tests {
assert_eq!(
fill(
"1 3 5 7\n1 3 5 7",
- Options::new(7).wrap_algorithm(wrap_algorithms::FirstFit)
+ Options::new(7).wrap_algorithm(WrapAlgorithm::FirstFit)
),
"1 3 5 7\n1 3 5 7"
);
assert_eq!(
fill(
"1 3 5 7\n1 3 5 7",
- Options::new(5).wrap_algorithm(wrap_algorithms::FirstFit)
+ Options::new(5).wrap_algorithm(WrapAlgorithm::FirstFit)
),
"1 3 5\n7\n1 3 5\n7"
);
@@ -1857,20 +1637,6 @@ mod tests {
}
#[test]
- #[cfg(not(feature = "smawk"))]
- #[cfg(not(feature = "unicode-linebreak"))]
- fn cloning_works() {
- static OPT: Options<
- wrap_algorithms::FirstFit,
- word_separators::AsciiSpace,
- word_splitters::HyphenSplitter,
- > = Options::with_word_splitter(80, word_splitters::HyphenSplitter);
- #[allow(clippy::clone_on_copy)]
- let opt = OPT.clone();
- assert_eq!(opt.width, 80);
- }
-
- #[test]
fn fill_inplace_empty() {
let mut text = String::from("");
fill_inplace(&mut text, 80);
@@ -2006,60 +1772,6 @@ mod tests {
}
#[test]
- fn trait_object_vec() {
- // Create a vector of Options containing trait-objects.
- let mut vector: Vec<
- Options<
- _,
- Box<dyn word_separators::WordSeparator>,
- Box<dyn word_splitters::WordSplitter>,
- >,
- > = Vec::new();
- // Expected result from each options
- let mut results = Vec::new();
-
- let opt_full_type: Options<
- _,
- Box<dyn word_separators::WordSeparator>,
- Box<dyn word_splitters::WordSplitter>,
- > =
- Options::new(10)
- .word_splitter(Box::new(word_splitters::HyphenSplitter)
- as Box<dyn word_splitters::WordSplitter>)
- .word_separator(Box::new(word_separators::AsciiSpace)
- as Box<dyn word_separators::WordSeparator>);
- vector.push(opt_full_type);
- results.push(vec!["over-", "caffinated"]);
-
- // Actually: Options<Box<AsciiSpace>, Box<dyn word_splitters::WordSplitter>>
- let opt_abbreviated_type =
- Options::new(10)
- .break_words(false)
- .word_splitter(Box::new(word_splitters::NoHyphenation)
- as Box<dyn word_splitters::WordSplitter>)
- .word_separator(Box::new(word_separators::AsciiSpace)
- as Box<dyn word_separators::WordSeparator>);
- vector.push(opt_abbreviated_type);
- results.push(vec!["over-caffinated"]);
-
- #[cfg(feature = "hyphenation")]
- {
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let opt_hyp = Options::new(8)
- .word_splitter(Box::new(dictionary) as Box<dyn word_splitters::WordSplitter>)
- .word_separator(Box::new(word_separators::AsciiSpace)
- as Box<dyn word_separators::WordSeparator>);
- vector.push(opt_hyp);
- results.push(vec!["over-", "caffi-", "nated"]);
- }
-
- // Test each entry
- for (opt, expected) in vector.into_iter().zip(results) {
- assert_eq!(wrap("over-caffinated", opt), expected);
- }
- }
-
- #[test]
fn wrap_columns_empty_text() {
assert_eq!(wrap_columns("", 1, 10, "| ", "", " |"), vec!["| |"]);
}
diff --git a/src/word_separators.rs b/src/word_separators.rs
index db03a91..25adf31 100644
--- a/src/word_separators.rs
+++ b/src/word_separators.rs
@@ -25,247 +25,162 @@ use crate::core::Word;
/// without emojis. A more complex approach is to use the Unicode line
/// breaking algorithm, which finds break points in non-ASCII text.
///
-/// The line breaks occur between words, please see the
-/// [`WordSplitter`](crate::word_splitters::WordSplitter) trait for
-/// options of how to handle hyphenation of individual words.
+/// The line breaks occur between words, please see
+/// [`WordSplitter`](crate::WordSplitter) for options of how to handle
+/// hyphenation of individual words.
///
/// # Examples
///
/// ```
/// use textwrap::core::Word;
-/// use textwrap::word_separators::{WordSeparator, AsciiSpace};
+/// use textwrap::WordSeparator::AsciiSpace;
///
/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
/// ```
-pub trait WordSeparator: WordSeparatorClone + std::fmt::Debug {
- // This trait should really return impl Iterator<Item = Word>, but
- // this isn't possible until Rust supports higher-kinded types:
- // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
- /// Find all words in `line`.
- fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a>;
-}
-
-// The internal `WordSeparatorClone` trait is allows us to implement
-// `Clone` for `Box<dyn WordSeparator>`. This in used in the
-// `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a,
-// WrapAlgo, WordSep, WordSplit>` implementation.
-#[doc(hidden)]
-pub trait WordSeparatorClone {
- fn clone_box(&self) -> Box<dyn WordSeparator>;
+#[derive(Clone, Copy)]
+pub enum WordSeparator {
+ /// Find words by splitting on runs of `' '` characters.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use textwrap::core::Word;
+ /// use textwrap::WordSeparator::AsciiSpace;
+ ///
+ /// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
+ /// assert_eq!(words, vec![Word::from("Hello "),
+ /// Word::from("World!")]);
+ /// ```
+ AsciiSpace,
+
+ /// Split `line` into words using Unicode break properties.
+ ///
+ /// This word separator uses the Unicode line breaking algorithm
+ /// described in [Unicode Standard Annex
+ /// #14](https://www.unicode.org/reports/tr14/) to find legal places
+ /// to break lines. There is a small difference in that the U+002D
+ /// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break:
+ /// to allow a line break at a hyphen, use
+ /// [`WordSplitter::HyphenSplitter`](crate::WordSplitter::HyphenSplitter).
+ /// Soft hyphens are not currently supported.
+ ///
+ /// # Examples
+ ///
+ /// Unlike [`WordSeparator::AsciiSpace`], the Unicode line
+ /// breaking algorithm will find line break opportunities between
+ /// some characters with no intervening whitespace:
+ ///
+ /// ```
+ /// #[cfg(feature = "unicode-linebreak")] {
+ /// use textwrap::core::Word;
+ /// use textwrap::WordSeparator::UnicodeBreakProperties;
+ ///
+ /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(),
+ /// vec![Word::from("Emojis: "),
+ /// Word::from("😂"),
+ /// Word::from("😍")]);
+ ///
+ /// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(),
+ /// vec![Word::from("CJK: "),
+ /// Word::from("你"),
+ /// Word::from("好")]);
+ /// }
+ /// ```
+ ///
+ /// A U+2060 (Word Joiner) character can be inserted if you want to
+ /// manually override the defaults and keep the characters together:
+ ///
+ /// ```
+ /// #[cfg(feature = "unicode-linebreak")] {
+ /// use textwrap::core::Word;
+ /// use textwrap::WordSeparator::UnicodeBreakProperties;
+ ///
+ /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(),
+ /// vec![Word::from("Emojis: "),
+ /// Word::from("😂\u{2060}😍")]);
+ /// }
+ /// ```
+ ///
+ /// The Unicode line breaking algorithm will also automatically
+ /// suppress break breaks around certain punctuation characters::
+ ///
+ /// ```
+ /// #[cfg(feature = "unicode-linebreak")] {
+ /// use textwrap::core::Word;
+ /// use textwrap::WordSeparator::UnicodeBreakProperties;
+ ///
+ /// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(),
+ /// vec![Word::from("[ foo ] "),
+ /// Word::from("bar !")]);
+ /// }
+ /// ```
+ #[cfg(feature = "unicode-linebreak")]
+ UnicodeBreakProperties,
+
+ /// Find words using a custom word separator
+ Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>),
}
-impl<T: WordSeparator + Clone + 'static> WordSeparatorClone for T {
- fn clone_box(&self) -> Box<dyn WordSeparator> {
- Box::new(self.clone())
- }
-}
-
-impl Clone for Box<dyn WordSeparator> {
- fn clone(&self) -> Box<dyn WordSeparator> {
- use std::ops::Deref;
- self.deref().clone_box()
+impl std::fmt::Debug for WordSeparator {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ WordSeparator::AsciiSpace => f.write_str("AsciiSpace"),
+ #[cfg(feature = "unicode-linebreak")]
+ WordSeparator::UnicodeBreakProperties => f.write_str("UnicodeBreakProperties"),
+ WordSeparator::Custom(_) => f.write_str("Custom(...)"),
+ }
}
}
-impl WordSeparator for Box<dyn WordSeparator> {
- fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
- use std::ops::Deref;
- self.deref().find_words(line)
+impl WordSeparator {
+ // This function should really return impl Iterator<Item = Word>, but
+ // this isn't possible until Rust supports higher-kinded types:
+ // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
+ /// Find all words in `line`.
+ pub fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+ match self {
+ WordSeparator::AsciiSpace => find_words_ascii_space(line),
+ #[cfg(feature = "unicode-linebreak")]
+ WordSeparator::UnicodeBreakProperties => find_words_unicode_break_properties(line),
+ WordSeparator::Custom(func) => func(line),
+ }
}
}
-/// Find words by splitting on regions of `' '` characters.
-#[derive(Clone, Copy, Debug, Default)]
-pub struct AsciiSpace;
-
-/// Split `line` into words separated by regions of `' '` characters.
-///
-/// # Examples
-///
-/// ```
-/// use textwrap::core::Word;
-/// use textwrap::word_separators::{AsciiSpace, WordSeparator};
-///
-/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
-/// assert_eq!(words, vec![Word::from("Hello "),
-/// Word::from("World!")]);
-/// ```
-impl WordSeparator for AsciiSpace {
- fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
- let mut start = 0;
- let mut in_whitespace = false;
- let mut char_indices = line.char_indices();
-
- Box::new(std::iter::from_fn(move || {
- // for (idx, ch) in char_indices does not work, gives this
- // error:
- //
- // > cannot move out of `char_indices`, a captured variable in
- // > an `FnMut` closure
- #[allow(clippy::while_let_on_iterator)]
- while let Some((idx, ch)) = char_indices.next() {
- if in_whitespace && ch != ' ' {
- let word = Word::from(&line[start..idx]);
- start = idx;
- in_whitespace = ch == ' ';
- return Some(word);
- }
-
+fn find_words_ascii_space<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+ let mut start = 0;
+ let mut in_whitespace = false;
+ let mut char_indices = line.char_indices();
+
+ Box::new(std::iter::from_fn(move || {
+ // for (idx, ch) in char_indices does not work, gives this
+ // error:
+ //
+ // > cannot move out of `char_indices`, a captured variable in
+ // > an `FnMut` closure
+ #[allow(clippy::while_let_on_iterator)]
+ while let Some((idx, ch)) = char_indices.next() {
+ if in_whitespace && ch != ' ' {
+ let word = Word::from(&line[start..idx]);
+ start = idx;
in_whitespace = ch == ' ';
- }
-
- if start < line.len() {
- let word = Word::from(&line[start..]);
- start = line.len();
return Some(word);
}
- None
- }))
- }
-}
-
-/// Find words using the Unicode line breaking algorithm.
-#[cfg(feature = "unicode-linebreak")]
-#[derive(Clone, Copy, Debug, Default)]
-pub struct UnicodeBreakProperties;
-
-/// Split `line` into words using Unicode break properties.
-///
-/// This word separator uses the Unicode line breaking algorithm
-/// described in [Unicode Standard Annex
-/// #14](https://www.unicode.org/reports/tr14/) to find legal places
-/// to break lines. There is a small difference in that the U+002D
-/// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break:
-/// to allow a line break at a hyphen, use the
-/// [`HyphenSplitter`](crate::word_splitters::HyphenSplitter). Soft
-/// hyphens are not currently supported.
-///
-/// # Examples
-///
-/// Unlike [`AsciiSpace`], the Unicode line breaking algorithm will
-/// find line break opportunities between some characters with no
-/// intervening whitespace:
-///
-/// ```
-/// #[cfg(feature = "unicode-linebreak")] {
-/// use textwrap::word_separators::{WordSeparator, UnicodeBreakProperties};
-/// use textwrap::core::Word;
-///
-/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(),
-/// vec![Word::from("Emojis: "),
-/// Word::from("😂"),
-/// Word::from("😍")]);
-///
-/// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(),
-/// vec![Word::from("CJK: "),
-/// Word::from("你"),
-/// Word::from("好")]);
-/// }
-/// ```
-///
-/// A U+2060 (Word Joiner) character can be inserted if you want to
-/// manually override the defaults and keep the characters together:
-///
-/// ```
-/// #[cfg(feature = "unicode-linebreak")] {
-/// use textwrap::word_separators::{UnicodeBreakProperties, WordSeparator};
-/// use textwrap::core::Word;
-///
-/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(),
-/// vec![Word::from("Emojis: "),
-/// Word::from("😂\u{2060}😍")]);
-/// }
-/// ```
-///
-/// The Unicode line breaking algorithm will also automatically
-/// suppress break breaks around certain punctuation characters::
-///
-/// ```
-/// #[cfg(feature = "unicode-linebreak")] {
-/// use textwrap::word_separators::{UnicodeBreakProperties, WordSeparator};
-/// use textwrap::core::Word;
-///
-/// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(),
-/// vec![Word::from("[ foo ] "),
-/// Word::from("bar !")]);
-/// }
-/// ```
-#[cfg(feature = "unicode-linebreak")]
-impl WordSeparator for UnicodeBreakProperties {
- fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
- // Construct an iterator over (original index, stripped index)
- // tuples. We find the Unicode linebreaks on a stripped string,
- // but we need the original indices so we can form words based on
- // the original string.
- let mut last_stripped_idx = 0;
- let mut char_indices = line.char_indices();
- let mut idx_map = std::iter::from_fn(move || match char_indices.next() {
- Some((orig_idx, ch)) => {
- let stripped_idx = last_stripped_idx;
- if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
- last_stripped_idx += ch.len_utf8();
- }
- Some((orig_idx, stripped_idx))
- }
- None => None,
- });
-
- let stripped = strip_ansi_escape_sequences(&line);
- let mut opportunities = unicode_linebreak::linebreaks(&stripped)
- .filter(|(idx, _)| {
- #[allow(clippy::match_like_matches_macro)]
- match &stripped[..*idx].chars().next_back() {
- // We suppress breaks at ‘-’ since we want to control
- // this via the WordSplitter.
- Some('-') => false,
- // Soft hyphens are currently not supported since we
- // require all `Word` fragments to be continuous in
- // the input string.
- Some(SHY) => false,
- // Other breaks should be fine!
- _ => true,
- }
- })
- .collect::<Vec<_>>()
- .into_iter();
-
- // Remove final break opportunity, we will add it below using
- // &line[start..]; This ensures that we correctly include a
- // trailing ANSI escape sequence.
- opportunities.next_back();
-
- let mut start = 0;
- Box::new(std::iter::from_fn(move || {
- #[allow(clippy::while_let_on_iterator)]
- while let Some((idx, _)) = opportunities.next() {
- if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx)
- {
- let word = Word::from(&line[start..orig_idx]);
- start = orig_idx;
- return Some(word);
- }
- }
+ in_whitespace = ch == ' ';
+ }
- if start < line.len() {
- let word = Word::from(&line[start..]);
- start = line.len();
- return Some(word);
- }
+ if start < line.len() {
+ let word = Word::from(&line[start..]);
+ start = line.len();
+ return Some(word);
+ }
- None
- }))
- }
+ None
+ }))
}
-/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’
-/// if a line is broken at this point, and otherwise be invisible.
-/// Textwrap does not currently support breaking words at soft
-/// hyphens.
-#[cfg(feature = "unicode-linebreak")]
-const SHY: char = '\u{00ad}';
-
// Strip all ANSI escape sequences from `text`.
#[cfg(feature = "unicode-linebreak")]
fn strip_ansi_escape_sequences(text: &str) -> String {
@@ -282,8 +197,83 @@ fn strip_ansi_escape_sequences(text: &str) -> String {
result
}
+/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’
+/// if a line is broken at this point, and otherwise be invisible.
+/// Textwrap does not currently support breaking words at soft
+/// hyphens.
+#[cfg(feature = "unicode-linebreak")]
+const SHY: char = '\u{00ad}';
+
+/// Find words in line. ANSI escape sequences are ignored in `line`.
+#[cfg(feature = "unicode-linebreak")]
+fn find_words_unicode_break_properties<'a>(
+ line: &'a str,
+) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+ // Construct an iterator over (original index, stripped index)
+ // tuples. We find the Unicode linebreaks on a stripped string,
+ // but we need the original indices so we can form words based on
+ // the original string.
+ let mut last_stripped_idx = 0;
+ let mut char_indices = line.char_indices();
+ let mut idx_map = std::iter::from_fn(move || match char_indices.next() {
+ Some((orig_idx, ch)) => {
+ let stripped_idx = last_stripped_idx;
+ if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
+ last_stripped_idx += ch.len_utf8();
+ }
+ Some((orig_idx, stripped_idx))
+ }
+ None => None,
+ });
+
+ let stripped = strip_ansi_escape_sequences(line);
+ let mut opportunities = unicode_linebreak::linebreaks(&stripped)
+ .filter(|(idx, _)| {
+ #[allow(clippy::match_like_matches_macro)]
+ match &stripped[..*idx].chars().next_back() {
+ // We suppress breaks at ‘-’ since we want to control
+ // this via the WordSplitter.
+ Some('-') => false,
+ // Soft hyphens are currently not supported since we
+ // require all `Word` fragments to be continuous in
+ // the input string.
+ Some(SHY) => false,
+ // Other breaks should be fine!
+ _ => true,
+ }
+ })
+ .collect::<Vec<_>>()
+ .into_iter();
+
+ // Remove final break opportunity, we will add it below using
+ // &line[start..]; This ensures that we correctly include a
+ // trailing ANSI escape sequence.
+ opportunities.next_back();
+
+ let mut start = 0;
+ Box::new(std::iter::from_fn(move || {
+ #[allow(clippy::while_let_on_iterator)]
+ while let Some((idx, _)) = opportunities.next() {
+ if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) {
+ let word = Word::from(&line[start..orig_idx]);
+ start = orig_idx;
+ return Some(word);
+ }
+ }
+
+ if start < line.len() {
+ let word = Word::from(&line[start..]);
+ start = line.len();
+ return Some(word);
+ }
+
+ None
+ }))
+}
+
#[cfg(test)]
mod tests {
+ use super::WordSeparator::*;
use super::*;
// Like assert_eq!, but the left expression is an iterator.
@@ -293,89 +283,121 @@ mod tests {
};
}
- #[test]
- fn ascii_space_empty() {
- assert_iter_eq!(AsciiSpace.find_words(""), vec![]);
- }
-
- #[test]
- fn ascii_space_single_word() {
- assert_iter_eq!(AsciiSpace.find_words("foo"), vec![Word::from("foo")]);
- }
-
- #[test]
- fn ascii_space_two_words() {
- assert_iter_eq!(
- AsciiSpace.find_words("foo bar"),
- vec![Word::from("foo "), Word::from("bar")]
- );
- }
-
- #[test]
- fn ascii_space_multiple_words() {
- assert_iter_eq!(
- AsciiSpace.find_words("foo bar baz"),
- vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
- );
- }
-
- #[test]
- fn ascii_space_only_whitespace() {
- assert_iter_eq!(AsciiSpace.find_words(" "), vec![Word::from(" ")]);
- }
-
- #[test]
- fn ascii_space_inter_word_whitespace() {
- assert_iter_eq!(
- AsciiSpace.find_words("foo bar"),
- vec![Word::from("foo "), Word::from("bar")]
- )
- }
-
- #[test]
- fn ascii_space_trailing_whitespace() {
- assert_iter_eq!(AsciiSpace.find_words("foo "), vec![Word::from("foo ")]);
+ fn to_words<'a>(words: Vec<&'a str>) -> Vec<Word<'a>> {
+ words.into_iter().map(|w: &str| Word::from(&w)).collect()
}
- #[test]
- fn ascii_space_leading_whitespace() {
- assert_iter_eq!(
- AsciiSpace.find_words(" foo"),
- vec![Word::from(" "), Word::from("foo")]
- );
- }
+ macro_rules! test_find_words {
+ ($ascii_name:ident,
+ $unicode_name:ident,
+ $([ $line:expr, $ascii_words:expr, $unicode_words:expr ]),+) => {
+ #[test]
+ fn $ascii_name() {
+ $(
+ let expected_words = to_words($ascii_words.to_vec());
+ let actual_words = WordSeparator::AsciiSpace
+ .find_words($line)
+ .collect::<Vec<_>>();
+ assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
+ )+
+ }
- #[test]
- fn ascii_space_multi_column_char() {
- assert_iter_eq!(
- AsciiSpace.find_words("\u{1f920}"), // cowboy emoji 🤠
- vec![Word::from("\u{1f920}")]
- );
+ #[test]
+ #[cfg(feature = "unicode-linebreak")]
+ fn $unicode_name() {
+ $(
+ let expected_words = to_words($unicode_words.to_vec());
+ let actual_words = WordSeparator::UnicodeBreakProperties
+ .find_words($line)
+ .collect::<Vec<_>>();
+ assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
+ )+
+ }
+ };
}
- #[test]
- fn ascii_space_hyphens() {
- assert_iter_eq!(
- AsciiSpace.find_words("foo-bar"),
- vec![Word::from("foo-bar")]
- );
- assert_iter_eq!(
- AsciiSpace.find_words("foo- bar"),
- vec![Word::from("foo- "), Word::from("bar")]
- );
- assert_iter_eq!(
- AsciiSpace.find_words("foo - bar"),
- vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
- );
- assert_iter_eq!(
- AsciiSpace.find_words("foo -bar"),
- vec![Word::from("foo "), Word::from("-bar")]
- );
- }
+ test_find_words!(ascii_space_empty, unicode_empty, ["", [], []]);
+
+ test_find_words!(
+ ascii_single_word,
+ unicode_single_word,
+ ["foo", ["foo"], ["foo"]]
+ );
+
+ test_find_words!(
+ ascii_two_words,
+ unicode_two_words,
+ ["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
+ );
+
+ test_find_words!(
+ ascii_multiple_words,
+ unicode_multiple_words,
+ ["foo bar", ["foo ", "bar"], ["foo ", "bar"]],
+ ["x y z", ["x ", "y ", "z"], ["x ", "y ", "z"]]
+ );
+
+ test_find_words!(
+ ascii_only_whitespace,
+ unicode_only_whitespace,
+ [" ", [" "], [" "]],
+ [" ", [" "], [" "]]
+ );
+
+ test_find_words!(
+ ascii_inter_word_whitespace,
+ unicode_inter_word_whitespace,
+ ["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
+ );
+
+ test_find_words!(
+ ascii_trailing_whitespace,
+ unicode_trailing_whitespace,
+ ["foo ", ["foo "], ["foo "]]
+ );
+
+ test_find_words!(
+ ascii_leading_whitespace,
+ unicode_leading_whitespace,
+ [" foo", [" ", "foo"], [" ", "foo"]]
+ );
+
+ test_find_words!(
+ ascii_multi_column_char,
+ unicode_multi_column_char,
+ ["\u{1f920}", ["\u{1f920}"], ["\u{1f920}"]] // cowboy emoji 🤠
+ );
+
+ test_find_words!(
+ ascii_hyphens,
+ unicode_hyphens,
+ ["foo-bar", ["foo-bar"], ["foo-bar"]],
+ ["foo- bar", ["foo- ", "bar"], ["foo- ", "bar"]],
+ ["foo - bar", ["foo ", "- ", "bar"], ["foo ", "- ", "bar"]],
+ ["foo -bar", ["foo ", "-bar"], ["foo ", "-bar"]]
+ );
+
+ test_find_words!(
+ ascii_newline,
+ unicode_newline,
+ ["foo\nbar", ["foo\nbar"], ["foo\n", "bar"]]
+ );
+
+ test_find_words!(
+ ascii_tab,
+ unicode_tab,
+ ["foo\tbar", ["foo\tbar"], ["foo\t", "bar"]]
+ );
+
+ test_find_words!(
+ ascii_non_breaking_space,
+ unicode_non_breaking_space,
+ ["foo\u{00A0}bar", ["foo\u{00A0}bar"], ["foo\u{00A0}bar"]]
+ );
#[test]
#[cfg(unix)]
- fn ascii_space_colored_text() {
+ fn find_words_colored_text() {
use termion::color::{Blue, Fg, Green, Reset};
let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset));
@@ -393,7 +415,7 @@ mod tests {
}
#[test]
- fn ascii_space_color_inside_word() {
+ fn find_words_color_inside_word() {
let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz";
assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]);
diff --git a/src/word_splitters.rs b/src/word_splitters.rs
index f4d94c7..69e246f 100644
--- a/src/word_splitters.rs
+++ b/src/word_splitters.rs
@@ -1,30 +1,26 @@
//! Word splitting functionality.
//!
//! To wrap text into lines, long words sometimes need to be split
-//! across lines. The [`WordSplitter`] trait defines this
-//! functionality. [`HyphenSplitter`] is the default implementation of
-//! this treat: it will simply split words on existing hyphens.
-
-use std::ops::Deref;
+//! across lines. The [`WordSplitter`] enum defines this
+//! functionality.
use crate::core::{display_width, Word};
-/// The `WordSplitter` trait describes where words can be split.
+/// The `WordSplitter` enum describes where words can be split.
///
/// If the textwrap crate has been compiled with the `hyphenation`
-/// Cargo feature enabled, you will find an implementation of
-/// `WordSplitter` by the `hyphenation::Standard` struct. Use this
-/// struct for language-aware hyphenation:
+/// Cargo feature enabled, you will find a
+/// [`WordSplitter::Hyphenation`] variant. Use this struct for
+/// language-aware hyphenation:
///
/// ```
-/// #[cfg(feature = "hyphenation")]
-/// {
+/// #[cfg(feature = "hyphenation")] {
/// use hyphenation::{Language, Load, Standard};
-/// use textwrap::{wrap, Options};
+/// use textwrap::{wrap, Options, WordSplitter};
///
/// let text = "Oxidation is the loss of electrons.";
/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
-/// let options = Options::new(8).word_splitter(dictionary);
+/// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary));
/// assert_eq!(wrap(text, &options), vec!["Oxida-",
/// "tion is",
/// "the loss",
@@ -37,124 +33,142 @@ use crate::core::{display_width, Word};
/// details.
///
/// [hyphenation]: https://docs.rs/hyphenation/
-pub trait WordSplitter: WordSplitterClone + std::fmt::Debug {
- /// Return all possible indices where `word` can be split.
+#[derive(Clone)]
+pub enum WordSplitter {
+ /// Use this as a [`Options.word_splitter`] to avoid any kind of
+ /// hyphenation:
///
- /// The indices returned must be in range `0..word.len()`. They
- /// should point to the index _after_ the split point, i.e., after
- /// `-` if splitting on hyphens. This way, `word.split_at(idx)`
- /// will break the word into two well-formed pieces.
+ /// ```
+ /// use textwrap::{wrap, Options, WordSplitter};
+ ///
+ /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
+ /// assert_eq!(wrap("foo bar-baz", &options),
+ /// vec!["foo", "bar-baz"]);
+ /// ```
+ ///
+ /// [`Options.word_splitter`]: super::Options::word_splitter
+ NoHyphenation,
+
+ /// `HyphenSplitter` is the default `WordSplitter` used by
+ /// [`Options::new`](super::Options::new). It will split words on
+ /// existing hyphens in the word.
+ ///
+ /// It will only use hyphens that are surrounded by alphanumeric
+ /// characters, which prevents a word like `"--foo-bar"` from
+ /// being split into `"--"` and `"foo-bar"`.
///
/// # Examples
///
/// ```
- /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter};
- /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]);
- /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
+ /// use textwrap::WordSplitter;
+ ///
+ /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"),
+ /// vec![6]);
/// ```
- fn split_points(&self, word: &str) -> Vec<usize>;
-}
-
-// The internal `WordSplitterClone` trait is allows us to implement
-// `Clone` for `Box<dyn WordSplitter>`. This in used in the
-// `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a,
-// WrapAlgo, WordSep, WordSplit>` implementation.
-#[doc(hidden)]
-pub trait WordSplitterClone {
- fn clone_box(&self) -> Box<dyn WordSplitter>;
-}
+ HyphenSplitter,
-impl<T: WordSplitter + Clone + 'static> WordSplitterClone for T {
- fn clone_box(&self) -> Box<dyn WordSplitter> {
- Box::new(self.clone())
- }
-}
+ /// Use a custom function as the word splitter.
+ ///
+ /// This varian lets you implement a custom word splitter using
+ /// your own function.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use textwrap::WordSplitter;
+ ///
+ /// fn split_at_underscore(word: &str) -> Vec<usize> {
+ /// word.match_indices('_').map(|(idx, _)| idx + 1).collect()
+ /// }
+ ///
+ /// let word_splitter = WordSplitter::Custom(split_at_underscore);
+ /// assert_eq!(word_splitter.split_points("a_long_identifier"),
+ /// vec![2, 7]);
+ /// ```
+ Custom(fn(word: &str) -> Vec<usize>),
-impl Clone for Box<dyn WordSplitter> {
- fn clone(&self) -> Box<dyn WordSplitter> {
- self.deref().clone_box()
- }
+ /// A hyphenation dictionary can be used to do language-specific
+ /// hyphenation using patterns from the [hyphenation] crate.
+ ///
+ /// **Note:** Only available when the `hyphenation` Cargo feature is
+ /// enabled.
+ ///
+ /// [hyphenation]: https://docs.rs/hyphenation/
+ #[cfg(feature = "hyphenation")]
+ Hyphenation(hyphenation::Standard),
}
-impl WordSplitter for Box<dyn WordSplitter> {
- fn split_points(&self, word: &str) -> Vec<usize> {
- self.deref().split_points(word)
+impl std::fmt::Debug for WordSplitter {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ WordSplitter::NoHyphenation => f.write_str("NoHyphenation"),
+ WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"),
+ WordSplitter::Custom(_) => f.write_str("Custom(...)"),
+ #[cfg(feature = "hyphenation")]
+ WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()),
+ }
}
}
-/// Use this as a [`Options.word_splitter`] to avoid any kind of
-/// hyphenation:
-///
-/// ```
-/// use textwrap::{wrap, Options};
-/// use textwrap::word_splitters::NoHyphenation;
-///
-/// let options = Options::new(8).word_splitter(NoHyphenation);
-/// assert_eq!(wrap("foo bar-baz", &options),
-/// vec!["foo", "bar-baz"]);
-/// ```
-///
-/// [`Options.word_splitter`]: super::Options::word_splitter
-#[derive(Clone, Copy, Debug)]
-pub struct NoHyphenation;
-
-/// `NoHyphenation` implements `WordSplitter` by not splitting the
-/// word at all.
-impl WordSplitter for NoHyphenation {
- fn split_points(&self, _: &str) -> Vec<usize> {
- Vec::new()
+impl PartialEq<WordSplitter> for WordSplitter {
+ fn eq(&self, other: &WordSplitter) -> bool {
+ match (self, other) {
+ (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true,
+ (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true,
+ #[cfg(feature = "hyphenation")]
+ (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => {
+ this_dict.language() == other_dict.language()
+ }
+ (_, _) => false,
+ }
}
}
-/// Simple and default way to split words: splitting on existing
-/// hyphens only.
-///
-/// You probably don't need to use this type since it's already used
-/// by default by [`Options::new`](super::Options::new).
-#[derive(Clone, Copy, Debug)]
-pub struct HyphenSplitter;
+impl WordSplitter {
+ /// Return all possible indices where `word` can be split.
+ ///
+ /// The indices are in the range `0..word.len()`. They point to
+ /// the index _after_ the split point, i.e., after `-` if
+ /// splitting on hyphens. This way, `word.split_at(idx)` will
+ /// break the word into two well-formed pieces.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use textwrap::WordSplitter;
+ /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]);
+ /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
+ /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]);
+ /// ```
+ pub fn split_points(&self, word: &str) -> Vec<usize> {
+ match self {
+ WordSplitter::NoHyphenation => Vec::new(),
+ WordSplitter::HyphenSplitter => {
+ let mut splits = Vec::new();
-/// `HyphenSplitter` is the default `WordSplitter` used by
-/// [`Options::new`](super::Options::new). It will split words on any
-/// existing hyphens in the word.
-///
-/// It will only use hyphens that are surrounded by alphanumeric
-/// characters, which prevents a word like `"--foo-bar"` from being
-/// split into `"--"` and `"foo-bar"`.
-impl WordSplitter for HyphenSplitter {
- fn split_points(&self, word: &str) -> Vec<usize> {
- let mut splits = Vec::new();
+ for (idx, _) in word.match_indices('-') {
+ // We only use hyphens that are surrounded by alphanumeric
+ // characters. This is to avoid splitting on repeated hyphens,
+ // such as those found in --foo-bar.
+ let prev = word[..idx].chars().next_back();
+ let next = word[idx + 1..].chars().next();
- for (idx, _) in word.match_indices('-') {
- // We only use hyphens that are surrounded by alphanumeric
- // characters. This is to avoid splitting on repeated hyphens,
- // such as those found in --foo-bar.
- let prev = word[..idx].chars().next_back();
- let next = word[idx + 1..].chars().next();
+ if prev.filter(|ch| ch.is_alphanumeric()).is_some()
+ && next.filter(|ch| ch.is_alphanumeric()).is_some()
+ {
+ splits.push(idx + 1); // +1 due to width of '-'.
+ }
+ }
- if prev.filter(|ch| ch.is_alphanumeric()).is_some()
- && next.filter(|ch| ch.is_alphanumeric()).is_some()
- {
- splits.push(idx + 1); // +1 due to width of '-'.
+ splits
+ }
+ WordSplitter::Custom(splitter_func) => splitter_func(word),
+ #[cfg(feature = "hyphenation")]
+ WordSplitter::Hyphenation(dictionary) => {
+ use hyphenation::Hyphenator;
+ dictionary.hyphenate(word).breaks
}
}
-
- splits
- }
-}
-
-/// A hyphenation dictionary can be used to do language-specific
-/// hyphenation using patterns from the [hyphenation] crate.
-///
-/// **Note:** Only available when the `hyphenation` Cargo feature is
-/// enabled.
-///
-/// [hyphenation]: https://docs.rs/hyphenation/
-#[cfg(feature = "hyphenation")]
-impl WordSplitter for hyphenation::Standard {
- fn split_points(&self, word: &str) -> Vec<usize> {
- use hyphenation::Hyphenator;
- self.hyphenate(word).breaks
}
}
@@ -164,31 +178,12 @@ impl WordSplitter for hyphenation::Standard {
/// Note that we split all words, regardless of their length. This is
/// to more cleanly separate the business of splitting (including
/// automatic hyphenation) from the business of word wrapping.
-///
-/// # Examples
-///
-/// ```
-/// use textwrap::core::Word;
-/// use textwrap::word_splitters::{split_words, NoHyphenation, HyphenSplitter};
-///
-/// assert_eq!(
-/// split_words(vec![Word::from("foo-bar")], &HyphenSplitter).collect::<Vec<_>>(),
-/// vec![Word::from("foo-"), Word::from("bar")]
-/// );
-///
-/// // The NoHyphenation splitter ignores the '-':
-/// assert_eq!(
-/// split_words(vec![Word::from("foo-bar")], &NoHyphenation).collect::<Vec<_>>(),
-/// vec![Word::from("foo-bar")]
-/// );
-/// ```
-pub fn split_words<'a, I, WordSplit>(
+pub fn split_words<'a, I>(
words: I,
- word_splitter: &'a WordSplit,
+ word_splitter: &'a WordSplitter,
) -> impl Iterator<Item = Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
- WordSplit: WordSplitter,
{
words.into_iter().flat_map(move |word| {
let mut prev = 0;
@@ -235,13 +230,13 @@ mod tests {
#[test]
fn split_words_no_words() {
- assert_iter_eq!(split_words(vec![], &HyphenSplitter), vec![]);
+ assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]);
}
#[test]
fn split_words_empty_word() {
assert_iter_eq!(
- split_words(vec![Word::from(" ")], &HyphenSplitter),
+ split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter),
vec![Word::from(" ")]
);
}
@@ -249,7 +244,7 @@ mod tests {
#[test]
fn split_words_single_word() {
assert_iter_eq!(
- split_words(vec![Word::from("foobar")], &HyphenSplitter),
+ split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter),
vec![Word::from("foobar")]
);
}
@@ -257,23 +252,28 @@ mod tests {
#[test]
fn split_words_hyphen_splitter() {
assert_iter_eq!(
- split_words(vec![Word::from("foo-bar")], &HyphenSplitter),
+ split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter),
vec![Word::from("foo-"), Word::from("bar")]
);
}
#[test]
+ fn split_words_no_hyphenation() {
+ assert_iter_eq!(
+ split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation),
+ vec![Word::from("foo-bar")]
+ );
+ }
+
+ #[test]
fn split_words_adds_penalty() {
- #[derive(Clone, Debug)]
- struct FixedSplitPoint;
- impl WordSplitter for FixedSplitPoint {
- fn split_points(&self, _: &str) -> Vec<usize> {
- vec![3]
- }
- }
+ let fixed_split_point = |_: &str| vec![3];
assert_iter_eq!(
- split_words(vec![Word::from("foobar")].into_iter(), &FixedSplitPoint),
+ split_words(
+ vec![Word::from("foobar")].into_iter(),
+ &WordSplitter::Custom(fixed_split_point)
+ ),
vec![
Word {
word: "foo",
@@ -291,7 +291,10 @@ mod tests {
);
assert_iter_eq!(
- split_words(vec![Word::from("fo-bar")].into_iter(), &FixedSplitPoint),
+ split_words(
+ vec![Word::from("fo-bar")].into_iter(),
+ &WordSplitter::Custom(fixed_split_point)
+ ),
vec![
Word {
word: "fo-",
diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs
index 368ef2a..5ca49c3 100644
--- a/src/wrap_algorithms.rs
+++ b/src/wrap_algorithms.rs
@@ -18,69 +18,149 @@
#[cfg(feature = "smawk")]
mod optimal_fit;
#[cfg(feature = "smawk")]
-pub use optimal_fit::{wrap_optimal_fit, OptimalFit};
+pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
use crate::core::{Fragment, Word};
/// Describes how to wrap words into lines.
///
-/// The simplest approach is to wrap words one word at a time. This is
-/// implemented by [`FirstFit`]. If the `smawk` Cargo feature is
-/// enabled, a more complex algorithm is available, implemented by
-/// [`OptimalFit`], which will look at an entire paragraph at a time
-/// in order to find optimal line breaks.
-pub trait WrapAlgorithm: WrapAlgorithmClone + std::fmt::Debug {
- /// Wrap words according to line widths.
+/// The simplest approach is to wrap words one word at a time and
+/// accept the first way of wrapping which fit
+/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
+/// enabled, a more complex algorithm is available which will look at
+/// an entire paragraph at a time in order to find optimal line breaks
+/// ([`WrapAlgorithm::OptimalFit`]).
+#[derive(Clone, Copy)]
+pub enum WrapAlgorithm {
+ /// Wrap words using a fast and simple algorithm.
///
- /// The `line_widths` slice gives the target line width for each
- /// line (the last slice element is repeated as necessary). This
- /// can be used to implement hanging indentation.
+ /// This algorithm uses no look-ahead when finding line breaks.
+ /// Implemented by [`wrap_first_fit`], please see that function for
+ /// details and examples.
+ FirstFit,
+
+ /// Wrap words using an advanced algorithm with look-ahead.
///
- /// Please see the implementors of the trait for examples.
- fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>;
-}
+ /// This wrapping algorithm considers the entire paragraph to find
+ /// optimal line breaks. When wrapping text, "penalties" are
+ /// assigned to line breaks based on the gaps left at the end of
+ /// lines. See [`Penalties`] for details.
+ ///
+ /// The underlying wrapping algorithm is implemented by
+ /// [`wrap_optimal_fit`], please see that function for examples.
+ ///
+ /// **Note:** Only available when the `smawk` Cargo feature is
+ /// enabled.
+ #[cfg(feature = "smawk")]
+ OptimalFit(Penalties),
-// The internal `WrapAlgorithmClone` trait is allows us to implement
-// `Clone` for `Box<dyn WrapAlgorithm>`. This in used in the
-// `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a,
-// WrapAlgo, WordSep, WordSplit>` implementation.
-#[doc(hidden)]
-pub trait WrapAlgorithmClone {
- fn clone_box(&self) -> Box<dyn WrapAlgorithm>;
+ /// Custom wrapping function.
+ ///
+ /// Use this if you want to implement your own wrapping algorithm.
+ /// The function can freely decide how to turn a slice of
+ /// [`Word`]s into lines.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use textwrap::core::Word;
+ /// use textwrap::{wrap, Options, WrapAlgorithm};
+ ///
+ /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
+ /// let mut lines = Vec::new();
+ /// let mut step = 1;
+ /// let mut start_idx = 0;
+ /// while start_idx + step <= words.len() {
+ /// lines.push(&words[start_idx .. start_idx+step]);
+ /// start_idx += step;
+ /// step += 1;
+ /// }
+ /// lines
+ /// }
+ ///
+ /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
+ /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
+ /// vec!["First,",
+ /// "second, third,",
+ /// "fourth, fifth, sixth"]);
+ /// ```
+ Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
}
-impl<T: WrapAlgorithm + Clone + 'static> WrapAlgorithmClone for T {
- fn clone_box(&self) -> Box<dyn WrapAlgorithm> {
- Box::new(self.clone())
+impl std::fmt::Debug for WrapAlgorithm {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
+ #[cfg(feature = "smawk")]
+ WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
+ WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
+ }
}
}
-impl Clone for Box<dyn WrapAlgorithm> {
- fn clone(&self) -> Box<dyn WrapAlgorithm> {
- use std::ops::Deref;
- self.deref().clone_box()
- }
-}
+impl WrapAlgorithm {
+ /// Create new wrap algorithm.
+ ///
+ /// The best wrapping algorithm is used by default, i.e.,
+ /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
+ /// [`WrapAlgorithm::FirstFit`].
+ pub const fn new() -> Self {
+ #[cfg(not(feature = "smawk"))]
+ {
+ WrapAlgorithm::FirstFit
+ }
-impl WrapAlgorithm for Box<dyn WrapAlgorithm> {
- fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> {
- use std::ops::Deref;
- self.deref().wrap(words, line_widths)
+ #[cfg(feature = "smawk")]
+ {
+ WrapAlgorithm::new_optimal_fit()
+ }
}
-}
-/// Wrap words using a fast and simple algorithm.
-///
-/// This algorithm uses no look-ahead when finding line breaks.
-/// Implemented by [`wrap_first_fit`], please see that function for
-/// details and examples.
-#[derive(Clone, Copy, Debug, Default)]
-pub struct FirstFit;
+ /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
+ /// works well for monospace text.
+ ///
+ /// **Note:** Only available when the `smawk` Cargo feature is
+ /// enabled.
+ #[cfg(feature = "smawk")]
+ pub const fn new_optimal_fit() -> Self {
+ WrapAlgorithm::OptimalFit(Penalties::new())
+ }
-impl WrapAlgorithm for FirstFit {
+ /// Wrap words according to line widths.
+ ///
+ /// The `line_widths` slice gives the target line width for each
+ /// line (the last slice element is repeated as necessary). This
+ /// can be used to implement hanging indentation.
#[inline]
- fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> {
- wrap_first_fit(words, line_widths)
+ pub fn wrap<'a, 'b>(
+ &self,
+ words: &'b [Word<'a>],
+ line_widths: &'b [usize],
+ ) -> Vec<&'b [Word<'a>]> {
+ // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
+ // = 9_007_199_254_740_992 can be represented without loss by
+ // a f64. Larger line widths will be rounded to the nearest
+ // representable number.
+ let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
+
+ match self {
+ WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
+
+ #[cfg(feature = "smawk")]
+ WrapAlgorithm::OptimalFit(penalties) => {
+ // The computation cannnot overflow when the line
+ // widths are restricted to usize.
+ wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
+ }
+
+ WrapAlgorithm::Custom(func) => func(words, line_widths),
+ }
+ }
+}
+
+impl Default for WrapAlgorithm {
+ fn default() -> Self {
+ WrapAlgorithm::new()
}
}
@@ -107,8 +187,8 @@ impl WrapAlgorithm for FirstFit {
///
/// ```
/// use textwrap::core::Word;
-/// use textwrap::wrap_algorithms;
-/// use textwrap::word_separators::{AsciiSpace, WordSeparator};
+/// use textwrap::wrap_algorithms::wrap_first_fit;
+/// use textwrap::WordSeparator;
///
/// // Helper to convert wrapped lines to a Vec<String>.
/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
@@ -118,8 +198,8 @@ impl WrapAlgorithm for FirstFit {
/// }
///
/// let text = "These few words will unfortunately not wrap nicely.";
-/// let words = AsciiSpace.find_words(text).collect::<Vec<_>>();
-/// assert_eq!(lines_to_strings(wrap_algorithms::wrap_first_fit(&words, &[15])),
+/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
+/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
/// vec!["These few words",
/// "will", // <-- short line
/// "unfortunately",
@@ -128,7 +208,9 @@ impl WrapAlgorithm for FirstFit {
///
/// // We can avoid the short line if we look ahead:
/// #[cfg(feature = "smawk")]
-/// assert_eq!(lines_to_strings(wrap_algorithms::wrap_optimal_fit(&words, &[15])),
+/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
+/// #[cfg(feature = "smawk")]
+/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
/// vec!["These few",
/// "words will",
/// "unfortunately",
@@ -157,47 +239,47 @@ impl WrapAlgorithm for FirstFit {
/// on your estimates. You can model this with a program like this:
///
/// ```
-/// use textwrap::wrap_algorithms::wrap_first_fit;
/// use textwrap::core::{Fragment, Word};
+/// use textwrap::wrap_algorithms::wrap_first_fit;
///
/// #[derive(Debug)]
/// struct Task<'a> {
/// name: &'a str,
-/// hours: usize, // Time needed to complete task.
-/// sweep: usize, // Time needed for a quick sweep after task during the day.
-/// cleanup: usize, // Time needed for full cleanup if day ends with this task.
+/// hours: f64, // Time needed to complete task.
+/// sweep: f64, // Time needed for a quick sweep after task during the day.
+/// cleanup: f64, // Time needed for full cleanup if day ends with this task.
/// }
///
/// impl Fragment for Task<'_> {
-/// fn width(&self) -> usize { self.hours }
-/// fn whitespace_width(&self) -> usize { self.sweep }
-/// fn penalty_width(&self) -> usize { self.cleanup }
+/// fn width(&self) -> f64 { self.hours }
+/// fn whitespace_width(&self) -> f64 { self.sweep }
+/// fn penalty_width(&self) -> f64 { self.cleanup }
/// }
///
/// // The morning tasks
/// let tasks = vec![
-/// Task { name: "Foundation", hours: 4, sweep: 2, cleanup: 3 },
-/// Task { name: "Framing", hours: 3, sweep: 1, cleanup: 2 },
-/// Task { name: "Plumbing", hours: 2, sweep: 2, cleanup: 2 },
-/// Task { name: "Electrical", hours: 2, sweep: 1, cleanup: 2 },
-/// Task { name: "Insulation", hours: 2, sweep: 1, cleanup: 2 },
-/// Task { name: "Drywall", hours: 3, sweep: 1, cleanup: 2 },
-/// Task { name: "Floors", hours: 3, sweep: 1, cleanup: 2 },
-/// Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 },
-/// Task { name: "Bathrooms", hours: 2, sweep: 1, cleanup: 2 },
+/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 },
+/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
+/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 },
+/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
+/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
+/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
+/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
+/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
+/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
/// ];
///
/// // Fill tasks into days, taking `day_length` into account. The
/// // output shows the hours worked per day along with the names of
/// // the tasks for that day.
-/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> {
+/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
/// let mut days = Vec::new();
/// // Assign tasks to days. The assignment is a vector of slices,
/// // with a slice per day.
/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
/// for day in assigned_days.iter() {
/// let last = day.last().unwrap();
-/// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum();
+/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
/// days.push((work_hours - last.sweep + last.cleanup, names));
/// }
@@ -206,24 +288,24 @@ impl WrapAlgorithm for FirstFit {
///
/// // With a single crew working 8 hours a day:
/// assert_eq!(
-/// assign_days(&tasks, 8),
+/// assign_days(&tasks, 8.0),
/// [
-/// (7, vec!["Foundation"]),
-/// (8, vec!["Framing", "Plumbing"]),
-/// (7, vec!["Electrical", "Insulation"]),
-/// (5, vec!["Drywall"]),
-/// (7, vec!["Floors", "Countertops"]),
-/// (4, vec!["Bathrooms"]),
+/// (7.0, vec!["Foundation"]),
+/// (8.0, vec!["Framing", "Plumbing"]),
+/// (7.0, vec!["Electrical", "Insulation"]),
+/// (5.0, vec!["Drywall"]),
+/// (7.0, vec!["Floors", "Countertops"]),
+/// (4.0, vec!["Bathrooms"]),
/// ]
/// );
///
/// // With two crews working in shifts, 16 hours a day:
/// assert_eq!(
-/// assign_days(&tasks, 16),
+/// assign_days(&tasks, 16.0),
/// [
-/// (14, vec!["Foundation", "Framing", "Plumbing"]),
-/// (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
-/// (6, vec!["Countertops", "Bathrooms"]),
+/// (14.0, vec!["Foundation", "Framing", "Plumbing"]),
+/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
+/// (6.0, vec!["Countertops", "Bathrooms"]),
/// ]
/// );
/// ```
@@ -232,13 +314,13 @@ impl WrapAlgorithm for FirstFit {
/// knows how long each step takes :-)
pub fn wrap_first_fit<'a, 'b, T: Fragment>(
fragments: &'a [T],
- line_widths: &'b [usize],
+ line_widths: &'b [f64],
) -> Vec<&'a [T]> {
// The final line width is used for all remaining lines.
- let default_line_width = line_widths.last().copied().unwrap_or(0);
+ let default_line_width = line_widths.last().copied().unwrap_or(0.0);
let mut lines = Vec::new();
let mut start = 0;
- let mut width = 0;
+ let mut width = 0.0;
for (idx, fragment) in fragments.iter().enumerate() {
let line_width = line_widths
@@ -248,10 +330,52 @@ pub fn wrap_first_fit<'a, 'b, T: Fragment>(
if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
lines.push(&fragments[start..idx]);
start = idx;
- width = 0;
+ width = 0.0;
}
width += fragment.width() + fragment.whitespace_width();
}
lines.push(&fragments[start..]);
lines
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[derive(Debug, PartialEq)]
+ struct Word(f64);
+
+ #[rustfmt::skip]
+ impl Fragment for Word {
+ fn width(&self) -> f64 { self.0 }
+ fn whitespace_width(&self) -> f64 { 1.0 }
+ fn penalty_width(&self) -> f64 { 0.0 }
+ }
+
+ #[test]
+ fn wrap_string_longer_than_f64() {
+ let words = vec![
+ Word(1e307),
+ Word(2e307),
+ Word(3e307),
+ Word(4e307),
+ Word(5e307),
+ Word(6e307),
+ ];
+ // Wrap at just under f64::MAX (~19e307). The tiny
+ // whitespace_widths disappear because of loss of precision.
+ assert_eq!(
+ wrap_first_fit(&words, &[15e307]),
+ &[
+ vec![
+ Word(1e307),
+ Word(2e307),
+ Word(3e307),
+ Word(4e307),
+ Word(5e307)
+ ],
+ vec![Word(6e307)]
+ ]
+ );
+ }
+}
diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs
index 95ecf1f..0625e28 100644
--- a/src/wrap_algorithms/optimal_fit.rs
+++ b/src/wrap_algorithms/optimal_fit.rs
@@ -1,23 +1,157 @@
use std::cell::RefCell;
-use crate::core::{Fragment, Word};
-use crate::wrap_algorithms::WrapAlgorithm;
+use crate::core::Fragment;
-/// Wrap words using an advanced algorithm with look-ahead.
+/// Penalties for
+/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
+/// and [`wrap_optimal_fit`].
///
-/// This wrapping algorithm considers the entire paragraph to find
-/// optimal line breaks. Implemented by [`wrap_optimal_fit`], please
-/// see that function for details and examples.
+/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
+/// entire paragraph to find optimal line breaks. When wrapping text,
+/// "penalties" are assigned to line breaks based on the gaps left at
+/// the end of lines. The penalties are given by this struct, with
+/// [`Penalties::default`] assigning penalties that work well for
+/// monospace text.
+///
+/// If you are wrapping proportional text, you are advised to assign
+/// your own penalties according to your font size. See the individual
+/// penalties below for details.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
-#[derive(Clone, Copy, Debug, Default)]
-pub struct OptimalFit;
+#[derive(Clone, Copy, Debug)]
+pub struct Penalties {
+ /// Per-line penalty. This is added for every line, which makes it
+ /// expensive to output more lines than the minimum required.
+ pub nline_penalty: usize,
+
+ /// Per-character cost for lines that overflow the target line width.
+ ///
+ /// With a default value of 50², every single character costs as
+ /// much as leaving a gap of 50 characters behind. This is because
+ /// we assign as cost of `gap * gap` to a short line. When
+ /// wrapping monospace text, we can overflow the line by 1
+ /// character in extreme cases:
+ ///
+ /// ```
+ /// use textwrap::core::Word;
+ /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
+ ///
+ /// let short = "foo ";
+ /// let long = "x".repeat(50);
+ /// let length = (short.len() + long.len()) as f64;
+ /// let fragments = vec![Word::from(short), Word::from(&long)];
+ /// let penalties = Penalties::new();
+ ///
+ /// // Perfect fit, both words are on a single line with no overflow.
+ /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
+ /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
+ ///
+ /// // The words no longer fit, yet we get a single line back. While
+ /// // the cost of overflow (`1 * 2500`) is the same as the cost of the
+ /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
+ /// // which makes it cheaper to overflow than to use two lines.
+ /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
+ /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
+ ///
+ /// // The cost of overflow would be 2 * 2500, whereas the cost of
+ /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
+ /// // 3401`. We therefore get two lines.
+ /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
+ /// assert_eq!(wrapped, vec![&[Word::from(short)],
+ /// &[Word::from(&long)]]);
+ /// ```
+ ///
+ /// This only happens if the overflowing word is 50 characters
+ /// long _and_ if the word overflows the line by exactly one
+ /// character. If it overflows by more than one character, the
+ /// overflow penalty will quickly outgrow the cost of the gap, as
+ /// seen above.
+ pub overflow_penalty: usize,
+
+ /// When should the a single word on the last line be considered
+ /// "too short"?
+ ///
+ /// If the last line of the text consist of a single word and if
+ /// this word is shorter than `1 / short_last_line_fraction` of
+ /// the line width, then the final line will be considered "short"
+ /// and `short_last_line_penalty` is added as an extra penalty.
+ ///
+ /// The effect of this is to avoid a final line consisting of a
+ /// single small word. For example, with a
+ /// `short_last_line_penalty` of 25 (the default), a gap of up to
+ /// 5 columns will be seen as more desirable than having a final
+ /// short line.
+ ///
+ /// ## Examples
+ ///
+ /// ```
+ /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
+ ///
+ /// let text = "This is a demo of the short last line penalty.";
+ ///
+ /// // The first-fit algorithm leaves a single short word on the last line:
+ /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
+ /// vec!["This is a demo of the short last line",
+ /// "penalty."]);
+ ///
+ /// #[cfg(feature = "smawk")] {
+ /// let mut penalties = wrap_algorithms::Penalties::new();
+ ///
+ /// // Since "penalty." is shorter than 25% of the line width, the
+ /// // optimal-fit algorithm adds a penalty of 25. This is enough
+ /// // to move "line " down:
+ /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
+ /// vec!["This is a demo of the short last",
+ /// "line penalty."]);
+ ///
+ /// // We can change the meaning of "short" lines. Here, only words
+ /// // shorter than 1/10th of the line width will be considered short:
+ /// penalties.short_last_line_fraction = 10;
+ /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
+ /// vec!["This is a demo of the short last line",
+ /// "penalty."]);
+ ///
+ /// // If desired, the penalty can also be disabled:
+ /// penalties.short_last_line_fraction = 4;
+ /// penalties.short_last_line_penalty = 0;
+ /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
+ /// vec!["This is a demo of the short last line",
+ /// "penalty."]);
+ /// }
+ /// ```
+ pub short_last_line_fraction: usize,
+
+ /// Penalty for a last line with a single short word.
+ ///
+ /// Set this to zero if you do not want to penalize short last lines.
+ pub short_last_line_penalty: usize,
+
+ /// Penalty for lines ending with a hyphen.
+ pub hyphen_penalty: usize,
+}
-impl WrapAlgorithm for OptimalFit {
- #[inline]
- fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> {
- wrap_optimal_fit(words, line_widths)
+impl Penalties {
+ /// Default penalties for monospace text.
+ ///
+ /// The penalties here work well for monospace text. This is
+ /// because they expect the gaps at the end of lines to be roughly
+ /// in the range `0..100`. If the gaps are larger, the
+ /// `overflow_penalty` and `hyphen_penalty` become insignificant.
+ pub const fn new() -> Self {
+ Penalties {
+ nline_penalty: 1000,
+ overflow_penalty: 50 * 50,
+ short_last_line_fraction: 4,
+ short_last_line_penalty: 25,
+ hyphen_penalty: 25,
+ }
+ }
+}
+
+impl Default for Penalties {
+ fn default() -> Self {
+ Self::new()
}
}
@@ -39,7 +173,7 @@ impl LineNumbers {
fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
while self.line_numbers.borrow_mut().len() < i + 1 {
let pos = self.line_numbers.borrow().len();
- let line_number = 1 + self.get(minima[pos].0, &minima);
+ let line_number = 1 + self.get(minima[pos].0, minima);
self.line_numbers.borrow_mut().push(line_number);
}
@@ -47,58 +181,17 @@ impl LineNumbers {
}
}
-/// Per-line penalty. This is added for every line, which makes it
-/// expensive to output more lines than the minimum required.
-const NLINE_PENALTY: i32 = 1000;
+/// Overflow error during the [`wrap_optimal_fit`] computation.
+#[derive(Debug, PartialEq, Eq)]
+pub struct OverflowError;
-/// Per-character cost for lines that overflow the target line width.
-///
-/// With a value of 50², every single character costs as much as
-/// leaving a gap of 50 characters behind. This is becuase we assign
-/// as cost of `gap * gap` to a short line. This means that we can
-/// overflow the line by 1 character in extreme cases:
-///
-/// ```
-/// use textwrap::wrap_algorithms::wrap_optimal_fit;
-/// use textwrap::core::Word;
-///
-/// let short = "foo ";
-/// let long = "x".repeat(50);
-/// let fragments = vec![Word::from(short), Word::from(&long)];
-///
-/// // Perfect fit, both words are on a single line with no overflow.
-/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len()]);
-/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
-///
-/// // The words no longer fit, yet we get a single line back. While
-/// // the cost of overflow (`1 * 2500`) is the same as the cost of the
-/// // gap (`50 * 50 = 2500`), the tie is broken by `NLINE_PENALTY`
-/// // which makes it cheaper to overflow than to use two lines.
-/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 1]);
-/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
-///
-/// // The cost of overflow would be 2 * 2500, whereas the cost of the
-/// // gap is only `49 * 49 + NLINE_PENALTY = 2401 + 1000 = 3401`. We
-/// // therefore get two lines.
-/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 2]);
-/// assert_eq!(wrapped, vec![&[Word::from(short)],
-/// &[Word::from(&long)]]);
-/// ```
-///
-/// This only happens if the overflowing word is 50 characters long
-/// _and_ if it happens to overflow the line by exactly one character.
-/// If it overflows by more than one character, the overflow penalty
-/// will quickly outgrow the cost of the gap, as seen above.
-const OVERFLOW_PENALTY: i32 = 50 * 50;
-
-/// The last line is short if it is less than 1/4 of the target width.
-const SHORT_LINE_FRACTION: usize = 4;
-
-/// Penalize a short last line.
-const SHORT_LAST_LINE_PENALTY: i32 = 25;
+impl std::fmt::Display for OverflowError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "wrap_optimal_fit cost computation overflowed")
+ }
+}
-/// Penalty for lines ending with a hyphen.
-const HYPHEN_PENALTY: i32 = 25;
+impl std::error::Error for OverflowError {}
/// Wrap abstract fragments into lines with an optimal-fit algorithm.
///
@@ -173,16 +266,48 @@ const HYPHEN_PENALTY: i32 = 25;
/// code by David
/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
///
+/// # Errors
+///
+/// In case of an overflow during the cost computation, an `Err` is
+/// returned. Overflows happens when fragments or lines have infinite
+/// widths (`f64::INFINITY`) or if the widths are so large that the
+/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
+/// (approximately 1e154):
+///
+/// ```
+/// use textwrap::core::Fragment;
+/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
+///
+/// #[derive(Debug, PartialEq)]
+/// struct Word(f64);
+///
+/// impl Fragment for Word {
+/// fn width(&self) -> f64 { self.0 }
+/// fn whitespace_width(&self) -> f64 { 1.0 }
+/// fn penalty_width(&self) -> f64 { 0.0 }
+/// }
+///
+/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
+/// // larger than f64::MAX:
+/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
+/// Err(OverflowError));
+/// ```
+///
+/// When using fragment widths and line widths which fit inside an
+/// `u64`, overflows cannot happen. This means that fragments derived
+/// from a `&str` cannot cause overflows.
+///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
fragments: &'a [T],
- line_widths: &'b [usize],
-) -> Vec<&'a [T]> {
+ line_widths: &'b [f64],
+ penalties: &'b Penalties,
+) -> Result<Vec<&'a [T]>, OverflowError> {
// The final line width is used for all remaining lines.
- let default_line_width = line_widths.last().copied().unwrap_or(0);
+ let default_line_width = line_widths.last().copied().unwrap_or(0.0);
let mut widths = Vec::with_capacity(fragments.len() + 1);
- let mut width = 0;
+ let mut width = 0.0;
widths.push(width);
for fragment in fragments {
width += fragment.width() + fragment.whitespace_width();
@@ -191,18 +316,18 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
let line_numbers = LineNumbers::new(fragments.len());
- let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| {
+ let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
// Line number for fragment `i`.
- let line_number = line_numbers.get(i, &minima);
+ let line_number = line_numbers.get(i, minima);
let line_width = line_widths
.get(line_number)
.copied()
.unwrap_or(default_line_width);
- let target_width = std::cmp::max(1, line_width);
+ let target_width = line_width.max(1.0);
// Compute the width of a line spanning fragments[i..j] in
// constant time. We need to adjust widths[j] by subtracting
- // the whitespace of fragment[j-i] and then add the penalty.
+ // the whitespace of fragment[j-1] and then add the penalty.
let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
+ fragments[j - 1].penalty_width();
@@ -211,35 +336,43 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
// breaking before fragments[i].
//
// First, every extra line cost NLINE_PENALTY.
- let mut cost = minima[i].1 + NLINE_PENALTY;
+ let mut cost = minima[i].1 + penalties.nline_penalty as f64;
// Next, we add a penalty depending on the line length.
if line_width > target_width {
// Lines that overflow get a hefty penalty.
- let overflow = (line_width - target_width) as i32;
- cost += overflow * OVERFLOW_PENALTY;
+ let overflow = line_width - target_width;
+ cost += overflow * penalties.overflow_penalty as f64;
} else if j < fragments.len() {
// Other lines (except for the last line) get a milder
// penalty which depend on the size of the gap.
- let gap = (target_width - line_width) as i32;
+ let gap = target_width - line_width;
cost += gap * gap;
- } else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION {
+ } else if i + 1 == j
+ && line_width < target_width / penalties.short_last_line_fraction as f64
+ {
// The last line can have any size gap, but we do add a
// penalty if the line is very short (typically because it
// contains just a single word).
- cost += SHORT_LAST_LINE_PENALTY;
+ cost += penalties.short_last_line_penalty as f64;
}
// Finally, we discourage hyphens.
- if fragments[j - 1].penalty_width() > 0 {
+ if fragments[j - 1].penalty_width() > 0.0 {
// TODO: this should use a penalty value from the fragment
// instead.
- cost += HYPHEN_PENALTY;
+ cost += penalties.hyphen_penalty as f64;
}
cost
});
+ for (_, cost) in &minima {
+ if cost.is_infinite() {
+ return Err(OverflowError);
+ }
+ }
+
let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
let mut pos = fragments.len();
loop {
@@ -252,5 +385,49 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
}
lines.reverse();
- lines
+ Ok(lines)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[derive(Debug, PartialEq)]
+ struct Word(f64);
+
+ #[rustfmt::skip]
+ impl Fragment for Word {
+ fn width(&self) -> f64 { self.0 }
+ fn whitespace_width(&self) -> f64 { 1.0 }
+ fn penalty_width(&self) -> f64 { 0.0 }
+ }
+
+ #[test]
+ fn wrap_fragments_with_infinite_widths() {
+ let words = vec![Word(f64::INFINITY)];
+ assert_eq!(
+ wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
+ Err(OverflowError)
+ );
+ }
+
+ #[test]
+ fn wrap_fragments_with_huge_widths() {
+ let words = vec![Word(1e200), Word(1e250), Word(1e300)];
+ assert_eq!(
+ wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
+ Err(OverflowError)
+ );
+ }
+
+ #[test]
+ fn wrap_fragments_with_large_widths() {
+ // The gaps will be of the sizes between 1e25 and 1e75. This
+ // makes the `gap * gap` cost fit comfortably in a f64.
+ let words = vec![Word(1e25), Word(1e50), Word(1e75)];
+ assert_eq!(
+ wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
+ Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
+ );
+ }
}
diff --git a/tests/traits.rs b/tests/traits.rs
deleted file mode 100644
index cd0d73c..0000000
--- a/tests/traits.rs
+++ /dev/null
@@ -1,86 +0,0 @@
-use textwrap::word_separators::{AsciiSpace, WordSeparator};
-use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter};
-use textwrap::wrap_algorithms::{FirstFit, WrapAlgorithm};
-use textwrap::Options;
-
-/// Cleaned up type name.
-fn type_name<T: ?Sized>(_val: &T) -> String {
- std::any::type_name::<T>().replace("alloc::boxed::Box", "Box")
-}
-
-#[test]
-#[cfg(not(feature = "smawk"))]
-#[cfg(not(feature = "unicode-linebreak"))]
-fn static_hyphensplitter() {
- // Inferring the full type.
- let options = Options::new(10);
- assert_eq!(
- type_name(&options),
- format!(
- "textwrap::Options<{}, {}, {}>",
- "textwrap::wrap_algorithms::FirstFit",
- "textwrap::word_separators::AsciiSpace",
- "textwrap::word_splitters::HyphenSplitter"
- )
- );
-
- // Inferring part of the type.
- let options: Options<_, _, HyphenSplitter> = Options::new(10);
- assert_eq!(
- type_name(&options),
- format!(
- "textwrap::Options<{}, {}, {}>",
- "textwrap::wrap_algorithms::FirstFit",
- "textwrap::word_separators::AsciiSpace",
- "textwrap::word_splitters::HyphenSplitter"
- )
- );
-
- // Explicitly making all parameters inferred.
- let options: Options<_, _, _> = Options::new(10);
- assert_eq!(
- type_name(&options),
- format!(
- "textwrap::Options<{}, {}, {}>",
- "textwrap::wrap_algorithms::FirstFit",
- "textwrap::word_separators::AsciiSpace",
- "textwrap::word_splitters::HyphenSplitter"
- )
- );
-}
-
-#[test]
-fn box_static_nohyphenation() {
- // Inferred static type.
- let options = Options::new(10)
- .wrap_algorithm(Box::new(FirstFit))
- .word_splitter(Box::new(NoHyphenation))
- .word_separator(Box::new(AsciiSpace));
- assert_eq!(
- type_name(&options),
- format!(
- "textwrap::Options<{}, {}, {}>",
- "Box<textwrap::wrap_algorithms::FirstFit>",
- "Box<textwrap::word_separators::AsciiSpace>",
- "Box<textwrap::word_splitters::NoHyphenation>"
- )
- );
-}
-
-#[test]
-fn box_dyn_wordsplitter() {
- // Inferred dynamic type due to default type parameter.
- let options = Options::new(10)
- .wrap_algorithm(Box::new(FirstFit) as Box<dyn WrapAlgorithm>)
- .word_splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>)
- .word_separator(Box::new(AsciiSpace) as Box<dyn WordSeparator>);
- assert_eq!(
- type_name(&options),
- format!(
- "textwrap::Options<{}, {}, {}>",
- "Box<dyn textwrap::wrap_algorithms::WrapAlgorithm>",
- "Box<dyn textwrap::word_separators::WordSeparator>",
- "Box<dyn textwrap::word_splitters::WordSplitter>"
- )
- );
-}