aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChih-hung Hsieh <chh@google.com>2020-07-14 05:30:38 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2020-07-14 05:30:38 +0000
commit0bd48c8f27ee6b744f799861cb8dbc7b8aec12f5 (patch)
tree80988bb4cf285b422d34484680b4b585549d18eb
parent052914716facd396193fe5ee245d2def3b87538f (diff)
parenta5796199984315b2f380e5fbb67d96473f368c8e (diff)
downloadaho-corasick-0bd48c8f27ee6b744f799861cb8dbc7b8aec12f5.tar.gz
Merge "Upgrade rust/crates/aho-corasick to 0.7.13" am: 4fa3c48a1f am: 3f82cced2b am: a563144b9c am: a579619998
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/aho-corasick/+/1361103 Change-Id: I5df4fa31350e26246381d27a7630d0c747e8c663
-rw-r--r--.cargo_vcs_info.json2
-rw-r--r--Cargo.toml2
-rw-r--r--Cargo.toml.orig4
-rw-r--r--METADATA6
-rw-r--r--README.md7
-rw-r--r--src/ahocorasick.rs52
-rw-r--r--src/lib.rs25
7 files changed, 70 insertions, 28 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 06dfa3d..725cd96 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "36de9d383aeaf925c7425ed53eee91e61cb9b61c"
+ "sha1": "55a42968a26a1150aca116fab63537330782d56a"
}
}
diff --git a/Cargo.toml b/Cargo.toml
index b240ec3..a0c306a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@
[package]
name = "aho-corasick"
-version = "0.7.10"
+version = "0.7.13"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"]
autotests = false
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 3166f9b..00d71ef 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
[package]
name = "aho-corasick"
-version = "0.7.10" #:version
+version = "0.7.13" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = "Fast multiple substring searching."
homepage = "https://github.com/BurntSushi/aho-corasick"
@@ -35,6 +35,8 @@ std = ["memchr/use_std"]
memchr = { version = "2.2.0", default-features = false }
[dev-dependencies]
+# TODO: Re-enable this once the MSRV is 1.43 or greater.
+# See: https://github.com/BurntSushi/aho-corasick/issues/62
doc-comment = "0.3.1"
[profile.release]
diff --git a/METADATA b/METADATA
index a6b685d..a2cc03d 100644
--- a/METADATA
+++ b/METADATA
@@ -9,11 +9,11 @@ third_party {
type: GIT
value: "https://github.com/BurntSushi/aho-corasick"
}
- version: "0.7.10"
+ version: "0.7.13"
license_type: NOTICE
last_upgrade_date {
year: 2020
- month: 3
- day: 31
+ month: 7
+ day: 10
}
}
diff --git a/README.md b/README.md
index 9ae3427..2f7a18e 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,8 @@ acceleration in some cases. This library provides multiple pattern
search principally through an implementation of the
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
which builds a finite state machine for executing searches in linear time.
-Features include case insensitive matching, overlapping matches and search &
-replace in streams.
+Features include case insensitive matching, overlapping matches, fast searching
+via SIMD and optional full DFA construction and search & replace in streams.
[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions)
[![](http://meritbadge.herokuapp.com/aho-corasick)](https://crates.io/crates/aho-corasick)
@@ -103,7 +103,8 @@ let rdr = "The quick brown fox.";
let mut wtr = vec![];
let ac = AhoCorasick::new(patterns);
-ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
+ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)
+ .expect("stream_replace_all failed");
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
```
diff --git a/src/ahocorasick.rs b/src/ahocorasick.rs
index 9b7d9e7..7880d13 100644
--- a/src/ahocorasick.rs
+++ b/src/ahocorasick.rs
@@ -502,7 +502,7 @@ impl<S: StateID> AhoCorasick<S> {
/// The closure accepts three parameters: the match found, the text of
/// the match and a string buffer with which to write the replaced text
/// (if any). If the closure returns `true`, then it continues to the next
- /// match. If the closure returns false, then searching is stopped.
+ /// match. If the closure returns `false`, then searching is stopped.
///
/// # Examples
///
@@ -524,6 +524,24 @@ impl<S: StateID> AhoCorasick<S> {
/// });
/// assert_eq!("0 the 2 to the 0age", result);
/// ```
+ ///
+ /// Stopping the replacement by returning `false` (continued from the
+ /// example above):
+ ///
+ /// ```
+ /// # use aho_corasick::{AhoCorasickBuilder, MatchKind};
+ /// # let patterns = &["append", "appendage", "app"];
+ /// # let haystack = "append the app to the appendage";
+ /// # let ac = AhoCorasickBuilder::new()
+ /// # .match_kind(MatchKind::LeftmostFirst)
+ /// # .build(patterns);
+ /// let mut result = String::new();
+ /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| {
+ /// dst.push_str(&mat.pattern().to_string());
+ /// mat.pattern() != 2
+ /// });
+ /// assert_eq!("0 the 2 to the appendage", result);
+ /// ```
pub fn replace_all_with<F>(
&self,
haystack: &str,
@@ -536,7 +554,9 @@ impl<S: StateID> AhoCorasick<S> {
for mat in self.find_iter(haystack) {
dst.push_str(&haystack[last_match..mat.start()]);
last_match = mat.end();
- replace_with(&mat, &haystack[mat.start()..mat.end()], dst);
+ if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) {
+ break;
+ };
}
dst.push_str(&haystack[last_match..]);
}
@@ -548,7 +568,7 @@ impl<S: StateID> AhoCorasick<S> {
/// The closure accepts three parameters: the match found, the text of
/// the match and a byte buffer with which to write the replaced text
/// (if any). If the closure returns `true`, then it continues to the next
- /// match. If the closure returns false, then searching is stopped.
+ /// match. If the closure returns `false`, then searching is stopped.
///
/// # Examples
///
@@ -570,6 +590,24 @@ impl<S: StateID> AhoCorasick<S> {
/// });
/// assert_eq!(b"0 the 2 to the 0age".to_vec(), result);
/// ```
+ ///
+ /// Stopping the replacement by returning `false` (continued from the
+ /// example above):
+ ///
+ /// ```
+ /// # use aho_corasick::{AhoCorasickBuilder, MatchKind};
+ /// # let patterns = &["append", "appendage", "app"];
+ /// # let haystack = b"append the app to the appendage";
+ /// # let ac = AhoCorasickBuilder::new()
+ /// # .match_kind(MatchKind::LeftmostFirst)
+ /// # .build(patterns);
+ /// let mut result = vec![];
+ /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| {
+ /// dst.extend(mat.pattern().to_string().bytes());
+ /// mat.pattern() != 2
+ /// });
+ /// assert_eq!(b"0 the 2 to the appendage".to_vec(), result);
+ /// ```
pub fn replace_all_with_bytes<F>(
&self,
haystack: &[u8],
@@ -582,7 +620,9 @@ impl<S: StateID> AhoCorasick<S> {
for mat in self.find_iter(haystack) {
dst.extend(&haystack[last_match..mat.start()]);
last_match = mat.end();
- replace_with(&mat, &haystack[mat.start()..mat.end()], dst);
+ if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) {
+ break;
+ };
}
dst.extend(&haystack[last_match..]);
}
@@ -735,9 +775,7 @@ impl<S: StateID> AhoCorasick<S> {
/// [`find_iter`](struct.AhoCorasick.html#method.find_iter).
///
/// The closure accepts three parameters: the match found, the text of
- /// the match and the writer with which to write the replaced text
- /// (if any). If the closure returns `true`, then it continues to the next
- /// match. If the closure returns false, then searching is stopped.
+ /// the match and the writer with which to write the replaced text (if any).
///
/// After all matches are replaced, the writer is _not_ flushed.
///
diff --git a/src/lib.rs b/src/lib.rs
index 28e984b..aa91c21 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -168,13 +168,14 @@ naive solutions, it is generally slower than more specialized algorithms that
are accelerated using vector instructions such as SIMD.
For that reason, this library will internally use a "prefilter" to attempt
-to accelerate searches when possible. Currently, this library has fairly
-limited implementation that only applies when there are 3 or fewer unique
-starting bytes among all patterns in an automaton.
-
-While a prefilter is generally good to have on by default since it works well
-in the common case, it can lead to less predictable or even sub-optimal
-performance in some cases. For that reason, prefilters can be disabled via
+to accelerate searches when possible. Currently, this library has several
+different algorithms it might use depending on the patterns provided. Once the
+number of patterns gets too big, prefilters are no longer used.
+
+While a prefilter is generally good to have on by default since it works
+well in the common case, it can lead to less predictable or even sub-optimal
+performance in some cases. For that reason, prefilters can be explicitly
+disabled via
[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter).
*/
@@ -186,12 +187,12 @@ performance in some cases. For that reason, prefilters can be disabled via
compile_error!("`std` feature is currently required to build this crate");
extern crate memchr;
-#[cfg(test)]
-#[macro_use]
-extern crate doc_comment;
+// #[cfg(doctest)]
+// #[macro_use]
+// extern crate doc_comment;
-#[cfg(test)]
-doctest!("../README.md");
+// #[cfg(doctest)]
+// doctest!("../README.md");
pub use ahocorasick::{
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,