diff options
author | Chih-hung Hsieh <chh@google.com> | 2020-07-14 05:30:38 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2020-07-14 05:30:38 +0000 |
commit | 0bd48c8f27ee6b744f799861cb8dbc7b8aec12f5 (patch) | |
tree | 80988bb4cf285b422d34484680b4b585549d18eb | |
parent | 052914716facd396193fe5ee245d2def3b87538f (diff) | |
parent | a5796199984315b2f380e5fbb67d96473f368c8e (diff) | |
download | aho-corasick-0bd48c8f27ee6b744f799861cb8dbc7b8aec12f5.tar.gz |
Merge "Upgrade rust/crates/aho-corasick to 0.7.13" am: 4fa3c48a1f am: 3f82cced2b am: a563144b9c am: a579619998
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/aho-corasick/+/1361103
Change-Id: I5df4fa31350e26246381d27a7630d0c747e8c663
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | Cargo.toml.orig | 4 | ||||
-rw-r--r-- | METADATA | 6 | ||||
-rw-r--r-- | README.md | 7 | ||||
-rw-r--r-- | src/ahocorasick.rs | 52 | ||||
-rw-r--r-- | src/lib.rs | 25 |
7 files changed, 70 insertions, 28 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 06dfa3d..725cd96 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,5 @@ { "git": { - "sha1": "36de9d383aeaf925c7425ed53eee91e61cb9b61c" + "sha1": "55a42968a26a1150aca116fab63537330782d56a" } } @@ -12,7 +12,7 @@ [package] name = "aho-corasick" -version = "0.7.10" +version = "0.7.13" authors = ["Andrew Gallant <jamslam@gmail.com>"] exclude = ["/aho-corasick-debug", "/ci/*", "/.travis.yml", "/appveyor.yml"] autotests = false diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 3166f9b..00d71ef 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "aho-corasick" -version = "0.7.10" #:version +version = "0.7.13" #:version authors = ["Andrew Gallant <jamslam@gmail.com>"] description = "Fast multiple substring searching." homepage = "https://github.com/BurntSushi/aho-corasick" @@ -35,6 +35,8 @@ std = ["memchr/use_std"] memchr = { version = "2.2.0", default-features = false } [dev-dependencies] +# TODO: Re-enable this once the MSRV is 1.43 or greater. +# See: https://github.com/BurntSushi/aho-corasick/issues/62 doc-comment = "0.3.1" [profile.release] @@ -9,11 +9,11 @@ third_party { type: GIT value: "https://github.com/BurntSushi/aho-corasick" } - version: "0.7.10" + version: "0.7.13" license_type: NOTICE last_upgrade_date { year: 2020 - month: 3 - day: 31 + month: 7 + day: 10 } } @@ -5,8 +5,8 @@ acceleration in some cases. This library provides multiple pattern search principally through an implementation of the [Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm), which builds a finite state machine for executing searches in linear time. -Features include case insensitive matching, overlapping matches and search & -replace in streams. +Features include case insensitive matching, overlapping matches, fast searching +via SIMD and optional full DFA construction and search & replace in streams. [![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions) [![](http://meritbadge.herokuapp.com/aho-corasick)](https://crates.io/crates/aho-corasick) @@ -103,7 +103,8 @@ let rdr = "The quick brown fox."; let mut wtr = vec![]; let ac = AhoCorasick::new(patterns); -ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?; +ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with) + .expect("stream_replace_all failed"); assert_eq!(b"The slow grey sloth.".to_vec(), wtr); ``` diff --git a/src/ahocorasick.rs b/src/ahocorasick.rs index 9b7d9e7..7880d13 100644 --- a/src/ahocorasick.rs +++ b/src/ahocorasick.rs @@ -502,7 +502,7 @@ impl<S: StateID> AhoCorasick<S> { /// The closure accepts three parameters: the match found, the text of /// the match and a string buffer with which to write the replaced text /// (if any). If the closure returns `true`, then it continues to the next - /// match. If the closure returns false, then searching is stopped. + /// match. If the closure returns `false`, then searching is stopped. /// /// # Examples /// @@ -524,6 +524,24 @@ impl<S: StateID> AhoCorasick<S> { /// }); /// assert_eq!("0 the 2 to the 0age", result); /// ``` + /// + /// Stopping the replacement by returning `false` (continued from the + /// example above): + /// + /// ``` + /// # use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// # let patterns = &["append", "appendage", "app"]; + /// # let haystack = "append the app to the appendage"; + /// # let ac = AhoCorasickBuilder::new() + /// # .match_kind(MatchKind::LeftmostFirst) + /// # .build(patterns); + /// let mut result = String::new(); + /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| { + /// dst.push_str(&mat.pattern().to_string()); + /// mat.pattern() != 2 + /// }); + /// assert_eq!("0 the 2 to the appendage", result); + /// ``` pub fn replace_all_with<F>( &self, haystack: &str, @@ -536,7 +554,9 @@ impl<S: StateID> AhoCorasick<S> { for mat in self.find_iter(haystack) { dst.push_str(&haystack[last_match..mat.start()]); last_match = mat.end(); - replace_with(&mat, &haystack[mat.start()..mat.end()], dst); + if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) { + break; + }; } dst.push_str(&haystack[last_match..]); } @@ -548,7 +568,7 @@ impl<S: StateID> AhoCorasick<S> { /// The closure accepts three parameters: the match found, the text of /// the match and a byte buffer with which to write the replaced text /// (if any). If the closure returns `true`, then it continues to the next - /// match. If the closure returns false, then searching is stopped. + /// match. If the closure returns `false`, then searching is stopped. /// /// # Examples /// @@ -570,6 +590,24 @@ impl<S: StateID> AhoCorasick<S> { /// }); /// assert_eq!(b"0 the 2 to the 0age".to_vec(), result); /// ``` + /// + /// Stopping the replacement by returning `false` (continued from the + /// example above): + /// + /// ``` + /// # use aho_corasick::{AhoCorasickBuilder, MatchKind}; + /// # let patterns = &["append", "appendage", "app"]; + /// # let haystack = b"append the app to the appendage"; + /// # let ac = AhoCorasickBuilder::new() + /// # .match_kind(MatchKind::LeftmostFirst) + /// # .build(patterns); + /// let mut result = vec![]; + /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { + /// dst.extend(mat.pattern().to_string().bytes()); + /// mat.pattern() != 2 + /// }); + /// assert_eq!(b"0 the 2 to the appendage".to_vec(), result); + /// ``` pub fn replace_all_with_bytes<F>( &self, haystack: &[u8], @@ -582,7 +620,9 @@ impl<S: StateID> AhoCorasick<S> { for mat in self.find_iter(haystack) { dst.extend(&haystack[last_match..mat.start()]); last_match = mat.end(); - replace_with(&mat, &haystack[mat.start()..mat.end()], dst); + if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) { + break; + }; } dst.extend(&haystack[last_match..]); } @@ -735,9 +775,7 @@ impl<S: StateID> AhoCorasick<S> { /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). /// /// The closure accepts three parameters: the match found, the text of - /// the match and the writer with which to write the replaced text - /// (if any). If the closure returns `true`, then it continues to the next - /// match. If the closure returns false, then searching is stopped. + /// the match and the writer with which to write the replaced text (if any). /// /// After all matches are replaced, the writer is _not_ flushed. /// @@ -168,13 +168,14 @@ naive solutions, it is generally slower than more specialized algorithms that are accelerated using vector instructions such as SIMD. For that reason, this library will internally use a "prefilter" to attempt -to accelerate searches when possible. Currently, this library has fairly -limited implementation that only applies when there are 3 or fewer unique -starting bytes among all patterns in an automaton. - -While a prefilter is generally good to have on by default since it works well -in the common case, it can lead to less predictable or even sub-optimal -performance in some cases. For that reason, prefilters can be disabled via +to accelerate searches when possible. Currently, this library has several +different algorithms it might use depending on the patterns provided. Once the +number of patterns gets too big, prefilters are no longer used. + +While a prefilter is generally good to have on by default since it works +well in the common case, it can lead to less predictable or even sub-optimal +performance in some cases. For that reason, prefilters can be explicitly +disabled via [`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter). */ @@ -186,12 +187,12 @@ performance in some cases. For that reason, prefilters can be disabled via compile_error!("`std` feature is currently required to build this crate"); extern crate memchr; -#[cfg(test)] -#[macro_use] -extern crate doc_comment; +// #[cfg(doctest)] +// #[macro_use] +// extern crate doc_comment; -#[cfg(test)] -doctest!("../README.md"); +// #[cfg(doctest)] +// doctest!("../README.md"); pub use ahocorasick::{ AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind, |