aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-06-15 21:45:16 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-06-15 21:45:16 +0000
commit5856c588c52d87fd4c1408b0d62ee40eda83634d (patch)
tree5f360a0d078f211e4938d8bdb073d5aa685ed554
parent471713278e7990f6326a0d142d68c416a997f7fb (diff)
parente47e2cd7550328fccccb1a12de97d62011f04bd7 (diff)
downloadregex-aml_tz3_314012010.tar.gz
Change-Id: I09b4f76635b861187223f7fb274f45ebfd84724e
-rw-r--r--.cargo_vcs_info.json2
-rw-r--r--Android.bp443
-rw-r--r--CHANGELOG.md64
-rw-r--r--Cargo.toml9
-rw-r--r--Cargo.toml.orig9
-rw-r--r--METADATA8
-rw-r--r--PERFORMANCE.md10
-rw-r--r--README.md12
-rw-r--r--TEST_MAPPING95
-rw-r--r--cargo2android.json11
-rw-r--r--examples/shootout-regex-dna-bytes.rs2
-rw-r--r--examples/shootout-regex-dna-cheat.rs2
-rw-r--r--examples/shootout-regex-dna-replace.rs2
-rw-r--r--examples/shootout-regex-dna-single-cheat.rs2
-rw-r--r--examples/shootout-regex-dna-single.rs2
-rw-r--r--examples/shootout-regex-dna.rs2
-rw-r--r--src/backtrack.rs10
-rw-r--r--src/compile.rs42
-rw-r--r--src/dfa.rs27
-rw-r--r--src/error.rs4
-rw-r--r--src/exec.rs59
-rw-r--r--src/expand.rs14
-rw-r--r--src/input.rs18
-rw-r--r--src/lib.rs70
-rw-r--r--src/literal/imp.rs780
-rw-r--r--src/literal/mod.rs2
-rw-r--r--src/pattern.rs3
-rw-r--r--src/pikevm.rs14
-rw-r--r--src/pool.rs10
-rw-r--r--src/prog.rs25
-rw-r--r--src/re_builder.rs12
-rw-r--r--src/re_bytes.rs90
-rw-r--r--src/re_set.rs12
-rw-r--r--src/re_trait.rs6
-rw-r--r--src/re_unicode.rs95
-rw-r--r--src/sparse.rs2
-rwxr-xr-xtest2
-rw-r--r--tests/regression_fuzz.rs12
-rw-r--r--tests/replace.rs4
-rw-r--r--tests/test_backtrack.rs3
-rw-r--r--tests/test_backtrack_bytes.rs3
-rw-r--r--tests/test_backtrack_utf8bytes.rs3
-rw-r--r--tests/test_crates_regex.rs3
-rw-r--r--tests/test_default.rs3
-rw-r--r--tests/test_default_bytes.rs3
-rw-r--r--tests/test_nfa.rs3
-rw-r--r--tests/test_nfa_bytes.rs3
-rw-r--r--tests/test_nfa_utf8bytes.rs3
48 files changed, 1080 insertions, 935 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 51b3cd6..33a143b 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "f2dc1b788f773a49f1b6633a6302054978344452"
+ "sha1": "ff283badce21dcebd581909d38b81f2c8c9bfb54"
}
}
diff --git a/Android.bp b/Android.bp
index 4fe3543..99eac66 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,4 +1,4 @@
-// This file is generated by cargo2android.py --config cargo2android.json.
+// This file is generated by cargo2android.py --run --device --dependencies.
// Do not modify this file as changes will be overridden on upgrade.
package {
@@ -42,10 +42,8 @@ rust_library {
name: "libregex",
host_supported: true,
crate_name: "regex",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
srcs: ["src/lib.rs"],
- edition: "2018",
+ edition: "2015",
features: [
"aho-corasick",
"default",
@@ -70,438 +68,9 @@ rust_library {
"libmemchr",
"libregex_syntax",
],
- apex_available: [
- "//apex_available:platform",
- "com.android.compos",
- "com.android.virt",
- ],
-}
-
-rust_test {
- name: "regex_test_src_lib",
- host_supported: true,
- crate_name: "regex",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["src/lib.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex_syntax",
- ],
-}
-
-rust_test {
- name: "regex_test_tests_test_backtrack",
- host_supported: true,
- crate_name: "backtrack",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_backtrack.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
-}
-
-rust_test {
- name: "regex_test_tests_test_backtrack_bytes",
- host_supported: true,
- crate_name: "backtrack_bytes",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_backtrack_bytes.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
-}
-
-rust_test {
- name: "regex_test_tests_test_backtrack_utf8bytes",
- host_supported: true,
- crate_name: "backtrack_utf8bytes",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_backtrack_utf8bytes.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
-}
-
-rust_test {
- name: "regex_test_tests_test_crates_regex",
- host_supported: true,
- crate_name: "crates_regex",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_crates_regex.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
-}
-
-rust_test {
- name: "regex_test_tests_test_default",
- host_supported: true,
- crate_name: "default",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_default.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
-}
-
-rust_test {
- name: "regex_test_tests_test_default_bytes",
- host_supported: true,
- crate_name: "default_bytes",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_default_bytes.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
-}
-
-rust_test {
- name: "regex_test_tests_test_nfa",
- host_supported: true,
- crate_name: "nfa",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_nfa.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
-}
-
-rust_test {
- name: "regex_test_tests_test_nfa_bytes",
- host_supported: true,
- crate_name: "nfa_bytes",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_nfa_bytes.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
}
-rust_test {
- name: "regex_test_tests_test_nfa_utf8bytes",
- host_supported: true,
- crate_name: "nfa_utf8bytes",
- cargo_env_compat: true,
- cargo_pkg_version: "1.5.4",
- srcs: ["tests/test_nfa_utf8bytes.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2018",
- features: [
- "aho-corasick",
- "default",
- "memchr",
- "perf",
- "perf-cache",
- "perf-dfa",
- "perf-inline",
- "perf-literal",
- "std",
- "unicode",
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
- ],
- rustlibs: [
- "libaho_corasick",
- "liblazy_static",
- "libmemchr",
- "libquickcheck",
- "librand",
- "libregex",
- "libregex_syntax",
- ],
-}
+// dependent_library ["feature_list"]
+// aho-corasick-0.7.15 "default,std"
+// memchr-2.3.4 "default,std,use_std"
+// regex-syntax-0.6.23 "default,unicode,unicode-age,unicode-bool,unicode-case,unicode-gencat,unicode-perl,unicode-script,unicode-segment"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 71d1963..f294972 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,67 +1,3 @@
-1.5.4 (2021-05-06)
-==================
-This release fixes another compilation failure when building regex. This time,
-the fix is for when the `pattern` feature is enabled, which only works on
-nightly Rust. CI has been updated to test this case.
-
-* [BUG #772](https://github.com/rust-lang/regex/pull/772):
- Fix build when `pattern` feature is enabled.
-
-
-1.5.3 (2021-05-01)
-==================
-This releases fixes a bug when building regex with only the `unicode-perl`
-feature. It turns out that while CI was building this configuration, it wasn't
-actually failing the overall build on a failed compilation.
-
-* [BUG #769](https://github.com/rust-lang/regex/issues/769):
- Fix build in `regex-syntax` when only the `unicode-perl` feature is enabled.
-
-
-1.5.2 (2021-05-01)
-==================
-This release fixes a performance bug when Unicode word boundaries are used.
-Namely, for certain regexes on certain inputs, it's possible for the lazy DFA
-to stop searching (causing a fallback to a slower engine) when it doesn't
-actually need to.
-
-[PR #768](https://github.com/rust-lang/regex/pull/768) fixes the bug, which was
-originally reported in
-[ripgrep#1860](https://github.com/BurntSushi/ripgrep/issues/1860).
-
-
-1.5.1 (2021-04-30)
-==================
-This is a patch release that fixes a compilation error when the `perf-literal`
-feature is not enabled.
-
-
-1.5.0 (2021-04-30)
-==================
-This release primarily updates to Rust 2018 (finally) and bumps the MSRV to
-Rust 1.41 (from Rust 1.28). Rust 1.41 was chosen because it's still reasonably
-old, and is what's in Debian stable at the time of writing.
-
-This release also drops this crate's own bespoke substring search algorithms
-in favor of a new
-[`memmem` implementation provided by the `memchr` crate](https://docs.rs/memchr/2.4.0/memchr/memmem/index.html).
-This will change the performance profile of some regexes, sometimes getting a
-little worse, and hopefully more frequently, getting a lot better. Please
-report any serious performance regressions if you find them.
-
-
-1.4.6 (2021-04-22)
-==================
-This is a small patch release that fixes the compiler's size check on how much
-heap memory a regex uses. Previously, the compiler did not account for the
-heap usage of Unicode character classes. Now it does. It's possible that this
-may make some regexes fail to compile that previously did compile. If that
-happens, please file an issue.
-
-* [BUG OSS-fuzz#33579](https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579):
- Some regexes can use more heap memory than one would expect.
-
-
1.4.5 (2021-03-14)
==================
This is a small patch release that fixes a regression in the size of a `Regex`
diff --git a/Cargo.toml b/Cargo.toml
index 260acec..6f8ef4e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,9 +11,8 @@
# will likely look very different (and much more reasonable)
[package]
-edition = "2018"
name = "regex"
-version = "1.5.4"
+version = "1.4.5"
authors = ["The Rust Project Developers"]
exclude = ["/scripts/*", "/.github/*"]
autotests = false
@@ -73,15 +72,15 @@ path = "tests/test_backtrack_bytes.rs"
name = "crates-regex"
path = "tests/test_crates_regex.rs"
[dependencies.aho-corasick]
-version = "0.7.18"
+version = "0.7.6"
optional = true
[dependencies.memchr]
-version = "2.4.0"
+version = "2.2.1"
optional = true
[dependencies.regex-syntax]
-version = "0.6.25"
+version = "0.6.22"
default-features = false
[dev-dependencies.lazy_static]
version = "1"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 468230b..4b9ca7f 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
[package]
name = "regex"
-version = "1.5.4" #:version
+version = "1.4.5" #:version
authors = ["The Rust Project Developers"]
license = "MIT OR Apache-2.0"
readme = "README.md"
@@ -14,7 +14,6 @@ finite automata and guarantees linear time matching on all inputs.
categories = ["text-processing"]
autotests = false
exclude = ["/scripts/*", "/.github/*"]
-edition = "2018"
[workspace]
members = [
@@ -106,18 +105,18 @@ pattern = []
# For very fast prefix literal matching.
[dependencies.aho-corasick]
-version = "0.7.18"
+version = "0.7.6"
optional = true
# For skipping along search text quickly when a leading byte is known.
[dependencies.memchr]
-version = "2.4.0"
+version = "2.2.1"
optional = true
# For parsing regular expressions.
[dependencies.regex-syntax]
path = "regex-syntax"
-version = "0.6.25"
+version = "0.6.22"
default-features = false
[dev-dependencies]
diff --git a/METADATA b/METADATA
index 6611aac..52d2bfe 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/regex/regex-1.5.4.crate"
+ value: "https://static.crates.io/crates/regex/regex-1.4.5.crate"
}
- version: "1.5.4"
+ version: "1.4.5"
license_type: NOTICE
last_upgrade_date {
year: 2021
- month: 5
- day: 19
+ month: 4
+ day: 1
}
}
diff --git a/PERFORMANCE.md b/PERFORMANCE.md
index 8cd0d9c..b4aeb89 100644
--- a/PERFORMANCE.md
+++ b/PERFORMANCE.md
@@ -62,7 +62,9 @@ on how your program is structured. Thankfully, the
[`lazy_static`](https://crates.io/crates/lazy_static)
crate provides an answer that works well:
- use lazy_static::lazy_static;
+ #[macro_use] extern crate lazy_static;
+ extern crate regex;
+
use regex::Regex;
fn some_helper_function(text: &str) -> bool {
@@ -145,9 +147,9 @@ In general, these are ordered from fastest to slowest.
`is_match` is fastest because it doesn't actually need to find the start or the
end of the leftmost-first match. It can quit immediately after it knows there
is a match. For example, given the regex `a+` and the haystack, `aaaaa`, the
-search will quit after examining the first byte.
+search will quit after examing the first byte.
-In contrast, `find` must return both the start and end location of the
+In constrast, `find` must return both the start and end location of the
leftmost-first match. It can use the DFA matcher for this, but must run it
forwards once to find the end of the match *and then run it backwards* to find
the start of the match. The two scans and the cost of finding the real end of
@@ -196,7 +198,7 @@ a few examples of regexes that get literal prefixes detected:
Literals in anchored regexes can also be used for detecting non-matches very
quickly. For example, `^foo\w+` and `\w+foo$` may be able to detect a non-match
-just by examining the first (or last) three bytes of the haystack.
+just by examing the first (or last) three bytes of the haystack.
## Unicode word boundaries may prevent the DFA from being used
diff --git a/README.md b/README.md
index 86d6996..f7a2554 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ by [RE2](https://github.com/google/re2).
[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
[![](https://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex)
-[![Rust](https://img.shields.io/badge/rust-1.41.1%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
+[![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
### Documentation
@@ -27,7 +27,13 @@ Add this to your `Cargo.toml`:
```toml
[dependencies]
-regex = "1.5"
+regex = "1"
+```
+
+and this to your crate root (if you're using Rust 2015):
+
+```rust
+extern crate regex;
```
Here's a simple example that matches a date in YYYY-MM-DD format and prints the
@@ -222,7 +228,7 @@ The full set of features one can disable are
### Minimum Rust version policy
-This crate's minimum supported `rustc` version is `1.41.1`.
+This crate's minimum supported `rustc` version is `1.28.0`.
The current **tentative** policy is that the minimum Rust version required
to use this crate can be increased in minor version updates. For example, if
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 6064d70..ee45373 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -1,105 +1,14 @@
// Generated by update_crate_tests.py for tests that depend on this crate.
{
- "imports": [
- {
- "path": "external/rust/crates/base64"
- },
- {
- "path": "external/rust/crates/libsqlite3-sys"
- },
- {
- "path": "external/rust/crates/once_cell"
- },
- {
- "path": "external/rust/crates/tinytemplate"
- },
- {
- "path": "external/rust/crates/tinyvec"
- },
- {
- "path": "external/rust/crates/unicode-xid"
- }
- ],
"presubmit": [
{
"name": "keystore2_test"
},
{
- "name": "legacykeystore_test"
- },
- {
- "name": "regex_test_src_lib"
- },
- {
- "name": "regex_test_tests_test_backtrack"
- },
- {
- "name": "regex_test_tests_test_backtrack_bytes"
- },
- {
- "name": "regex_test_tests_test_backtrack_utf8bytes"
- },
- {
- "name": "regex_test_tests_test_crates_regex"
- },
- {
- "name": "regex_test_tests_test_default"
- },
- {
- "name": "regex_test_tests_test_default_bytes"
- },
- {
- "name": "regex_test_tests_test_nfa"
- },
- {
- "name": "regex_test_tests_test_nfa_bytes"
- },
- {
- "name": "regex_test_tests_test_nfa_utf8bytes"
- },
- {
- "name": "virtualizationservice_device_test"
- }
- ],
- "presubmit-rust": [
- {
- "name": "keystore2_test"
- },
- {
- "name": "legacykeystore_test"
- },
- {
- "name": "regex_test_src_lib"
- },
- {
- "name": "regex_test_tests_test_backtrack"
- },
- {
- "name": "regex_test_tests_test_backtrack_bytes"
- },
- {
- "name": "regex_test_tests_test_backtrack_utf8bytes"
- },
- {
- "name": "regex_test_tests_test_crates_regex"
- },
- {
- "name": "regex_test_tests_test_default"
- },
- {
- "name": "regex_test_tests_test_default_bytes"
- },
- {
- "name": "regex_test_tests_test_nfa"
- },
- {
- "name": "regex_test_tests_test_nfa_bytes"
- },
- {
- "name": "regex_test_tests_test_nfa_utf8bytes"
+ "name": "vpnprofilestore_test"
},
{
- "name": "virtualizationservice_device_test"
+ "name": "libsqlite3-sys_device_test_src_lib"
}
]
}
diff --git a/cargo2android.json b/cargo2android.json
deleted file mode 100644
index 0e54308..0000000
--- a/cargo2android.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
- "apex-available": [
- "//apex_available:platform",
- "com.android.compos",
- "com.android.virt"
- ],
- "dependencies": true,
- "device": true,
- "run": true,
- "tests": true
-}
diff --git a/examples/shootout-regex-dna-bytes.rs b/examples/shootout-regex-dna-bytes.rs
index 773fd9b..ac4c115 100644
--- a/examples/shootout-regex-dna-bytes.rs
+++ b/examples/shootout-regex-dna-bytes.rs
@@ -5,6 +5,8 @@
// contributed by TeXitoi
// contributed by BurntSushi
+extern crate regex;
+
use std::io::{self, Read};
use std::sync::Arc;
use std::thread;
diff --git a/examples/shootout-regex-dna-cheat.rs b/examples/shootout-regex-dna-cheat.rs
index 1bde7ab..c7395b7 100644
--- a/examples/shootout-regex-dna-cheat.rs
+++ b/examples/shootout-regex-dna-cheat.rs
@@ -10,6 +10,8 @@
// replacing them with a single linear scan. i.e., it re-implements
// `replace_all`. As a result, this is around 25% faster. ---AG
+extern crate regex;
+
use std::io::{self, Read};
use std::sync::Arc;
use std::thread;
diff --git a/examples/shootout-regex-dna-replace.rs b/examples/shootout-regex-dna-replace.rs
index 20694e0..681e077 100644
--- a/examples/shootout-regex-dna-replace.rs
+++ b/examples/shootout-regex-dna-replace.rs
@@ -1,3 +1,5 @@
+extern crate regex;
+
use std::io::{self, Read};
macro_rules! regex {
diff --git a/examples/shootout-regex-dna-single-cheat.rs b/examples/shootout-regex-dna-single-cheat.rs
index 70a979c..04ed05a 100644
--- a/examples/shootout-regex-dna-single-cheat.rs
+++ b/examples/shootout-regex-dna-single-cheat.rs
@@ -5,6 +5,8 @@
// contributed by TeXitoi
// contributed by BurntSushi
+extern crate regex;
+
use std::io::{self, Read};
macro_rules! regex {
diff --git a/examples/shootout-regex-dna-single.rs b/examples/shootout-regex-dna-single.rs
index b474059..a70c711 100644
--- a/examples/shootout-regex-dna-single.rs
+++ b/examples/shootout-regex-dna-single.rs
@@ -5,6 +5,8 @@
// contributed by TeXitoi
// contributed by BurntSushi
+extern crate regex;
+
use std::io::{self, Read};
macro_rules! regex {
diff --git a/examples/shootout-regex-dna.rs b/examples/shootout-regex-dna.rs
index b96518e..4527422 100644
--- a/examples/shootout-regex-dna.rs
+++ b/examples/shootout-regex-dna.rs
@@ -5,6 +5,8 @@
// contributed by TeXitoi
// contributed by BurntSushi
+extern crate regex;
+
use std::io::{self, Read};
use std::sync::Arc;
use std::thread;
diff --git a/src/backtrack.rs b/src/backtrack.rs
index a3d25d6..6100c17 100644
--- a/src/backtrack.rs
+++ b/src/backtrack.rs
@@ -16,10 +16,10 @@
// the bitset has to be zeroed on each execution, which becomes quite expensive
// on large bitsets.
-use crate::exec::ProgramCache;
-use crate::input::{Input, InputAt};
-use crate::prog::{InstPtr, Program};
-use crate::re_trait::Slot;
+use exec::ProgramCache;
+use input::{Input, InputAt};
+use prog::{InstPtr, Program};
+use re_trait::Slot;
type Bits = u32;
@@ -196,7 +196,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
}
fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool {
- use crate::prog::Inst::*;
+ use prog::Inst::*;
loop {
// This loop is an optimization to avoid constantly pushing/popping
// from the stack. Namely, if we're pushing a job only to run it
diff --git a/src/compile.rs b/src/compile.rs
index 9a2ed5e..9ffd347 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -4,16 +4,16 @@ use std::iter;
use std::result;
use std::sync::Arc;
-use regex_syntax::hir::{self, Hir};
-use regex_syntax::is_word_byte;
-use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
+use syntax::hir::{self, Hir};
+use syntax::is_word_byte;
+use syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
-use crate::prog::{
+use prog::{
EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges,
InstSave, InstSplit, Program,
};
-use crate::Error;
+use Error;
type Result = result::Result<Patch, Error>;
type ResultOrEmpty = result::Result<Option<Patch>, Error>;
@@ -38,7 +38,6 @@ pub struct Compiler {
suffix_cache: SuffixCache,
utf8_seqs: Option<Utf8Sequences>,
byte_classes: ByteClassSet,
- extra_inst_bytes: usize,
}
impl Compiler {
@@ -55,7 +54,6 @@ impl Compiler {
suffix_cache: SuffixCache::new(1000),
utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')),
byte_classes: ByteClassSet::new(),
- extra_inst_bytes: 0,
}
}
@@ -255,8 +253,8 @@ impl Compiler {
/// Ok(None) is returned when an expression is compiled to no
/// instruction, and so no patch.entry value makes sense.
fn c(&mut self, expr: &Hir) -> ResultOrEmpty {
- use crate::prog;
- use regex_syntax::hir::HirKind::*;
+ use prog;
+ use syntax::hir::HirKind::*;
self.check_size()?;
match *expr.kind() {
@@ -318,13 +316,6 @@ impl Compiler {
}
self.compiled.has_unicode_word_boundary = true;
self.byte_classes.set_word_boundary();
- // We also make sure that all ASCII bytes are in a different
- // class from non-ASCII bytes. Otherwise, it's possible for
- // ASCII bytes to get lumped into the same class as non-ASCII
- // bytes. This in turn may cause the lazy DFA to falsely start
- // when it sees an ASCII byte that maps to a byte class with
- // non-ASCII bytes. This ensures that never happens.
- self.byte_classes.set_range(0, 0x7F);
self.c_empty_look(prog::EmptyLook::WordBoundary)
}
WordBoundary(hir::WordBoundary::UnicodeNegate) => {
@@ -337,8 +328,6 @@ impl Compiler {
}
self.compiled.has_unicode_word_boundary = true;
self.byte_classes.set_word_boundary();
- // See comments above for why we set the ASCII range here.
- self.byte_classes.set_range(0, 0x7F);
self.c_empty_look(prog::EmptyLook::NotWordBoundary)
}
WordBoundary(hir::WordBoundary::Ascii) => {
@@ -431,8 +420,6 @@ impl Compiler {
}
fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
- use std::mem::size_of;
-
assert!(!ranges.is_empty());
if self.compiled.uses_bytes() {
Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
@@ -442,8 +429,6 @@ impl Compiler {
let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 {
self.push_hole(InstHole::Char { c: ranges[0].0 })
} else {
- self.extra_inst_bytes +=
- ranges.len() * (size_of::<char>() * 2);
self.push_hole(InstHole::Ranges { ranges: ranges })
};
Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
@@ -563,7 +548,7 @@ impl Compiler {
}
fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
- use regex_syntax::hir::RepetitionKind::*;
+ use syntax::hir::RepetitionKind::*;
match rep.kind {
ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
@@ -810,9 +795,7 @@ impl Compiler {
fn check_size(&self) -> result::Result<(), Error> {
use std::mem::size_of;
- let size =
- self.extra_inst_bytes + (self.insts.len() * size_of::<Inst>());
- if size > self.size_limit {
+ if self.insts.len() * size_of::<Inst>() > self.size_limit {
Err(Error::CompiledTooBig(self.size_limit))
} else {
Ok(())
@@ -944,10 +927,9 @@ impl InstHole {
Inst::EmptyLook(InstEmptyLook { goto: goto, look: look })
}
InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }),
- InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges {
- goto: goto,
- ranges: ranges.clone().into_boxed_slice(),
- }),
+ InstHole::Ranges { ref ranges } => {
+ Inst::Ranges(InstRanges { goto: goto, ranges: ranges.clone() })
+ }
InstHole::Bytes { start, end } => {
Inst::Bytes(InstBytes { goto: goto, start: start, end: end })
}
diff --git a/src/dfa.rs b/src/dfa.rs
index 4b60f4d..9ac0c2c 100644
--- a/src/dfa.rs
+++ b/src/dfa.rs
@@ -42,9 +42,9 @@ use std::iter::repeat;
use std::mem;
use std::sync::Arc;
-use crate::exec::ProgramCache;
-use crate::prog::{Inst, Program};
-use crate::sparse::SparseSet;
+use exec::ProgramCache;
+use prog::{Inst, Program};
+use sparse::SparseSet;
/// Return true if and only if the given program can be executed by a DFA.
///
@@ -55,7 +55,7 @@ use crate::sparse::SparseSet;
/// This function will also return false if the given program has any Unicode
/// instructions (Char or Ranges) since the DFA operates on bytes only.
pub fn can_exec(insts: &Program) -> bool {
- use crate::prog::Inst::*;
+ use prog::Inst::*;
// If for some reason we manage to allocate a regex program with more
// than i32::MAX instructions, then we can't execute the DFA because we
// use 32 bit instruction pointer deltas for memory savings.
@@ -306,7 +306,7 @@ impl State {
StateFlags(self.data[0])
}
- fn inst_ptrs(&self) -> InstPtrs<'_> {
+ fn inst_ptrs(&self) -> InstPtrs {
InstPtrs { base: 0, data: &self.data[1..] }
}
}
@@ -894,7 +894,7 @@ impl<'a> Fsm<'a> {
mut si: StatePtr,
b: Byte,
) -> Option<StatePtr> {
- use crate::prog::Inst::*;
+ use prog::Inst::*;
// Initialize a queue with the current DFA state's NFA states.
qcur.clear();
@@ -1056,8 +1056,8 @@ impl<'a> Fsm<'a> {
q: &mut SparseSet,
flags: EmptyFlags,
) {
- use crate::prog::EmptyLook::*;
- use crate::prog::Inst::*;
+ use prog::EmptyLook::*;
+ use prog::Inst::*;
// We need to traverse the NFA to follow epsilon transitions, so avoid
// recursion with an explicit stack.
@@ -1190,7 +1190,7 @@ impl<'a> Fsm<'a> {
q: &SparseSet,
state_flags: &mut StateFlags,
) -> Option<State> {
- use crate::prog::Inst::*;
+ use prog::Inst::*;
// We need to build up enough information to recognize pre-built states
// in the DFA. Generally speaking, this includes every instruction
@@ -1754,7 +1754,7 @@ impl Byte {
}
impl fmt::Debug for State {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let ips: Vec<usize> = self.inst_ptrs().collect();
f.debug_struct("State")
.field("flags", &self.flags())
@@ -1764,7 +1764,7 @@ impl fmt::Debug for State {
}
impl fmt::Debug for Transitions {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut fmtd = f.debug_map();
for si in 0..self.num_states() {
let s = si * self.num_byte_classes;
@@ -1778,7 +1778,7 @@ impl fmt::Debug for Transitions {
struct TransitionsRow<'a>(&'a [StatePtr]);
impl<'a> fmt::Debug for TransitionsRow<'a> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut fmtd = f.debug_map();
for (b, si) in self.0.iter().enumerate() {
match *si {
@@ -1796,7 +1796,7 @@ impl<'a> fmt::Debug for TransitionsRow<'a> {
}
impl fmt::Debug for StateFlags {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("StateFlags")
.field("is_match", &self.is_match())
.field("is_word", &self.is_word())
@@ -1889,6 +1889,7 @@ fn read_varu32(data: &[u8]) -> (u32, usize) {
#[cfg(test)]
mod tests {
+ extern crate rand;
use super::{
push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32,
diff --git a/src/error.rs b/src/error.rs
index 3e0ec75..1c32c85 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -31,7 +31,7 @@ impl ::std::error::Error for Error {
}
impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Syntax(ref err) => err.fmt(f),
Error::CompiledTooBig(limit) => write!(
@@ -49,7 +49,7 @@ impl fmt::Display for Error {
// but the `Syntax` variant is already storing a `String` anyway, so we might
// as well format it nicely.
impl fmt::Debug for Error {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Syntax(ref err) => {
let hr: String = repeat('~').take(79).collect();
diff --git a/src/exec.rs b/src/exec.rs
index d5fad1c..3d5a52b 100644
--- a/src/exec.rs
+++ b/src/exec.rs
@@ -5,26 +5,26 @@ use std::sync::Arc;
#[cfg(feature = "perf-literal")]
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
-use regex_syntax::hir::literal::Literals;
-use regex_syntax::hir::Hir;
-use regex_syntax::ParserBuilder;
+use syntax::hir::literal::Literals;
+use syntax::hir::Hir;
+use syntax::ParserBuilder;
-use crate::backtrack;
-use crate::compile::Compiler;
+use backtrack;
+use compile::Compiler;
#[cfg(feature = "perf-dfa")]
-use crate::dfa;
-use crate::error::Error;
-use crate::input::{ByteInput, CharInput};
-use crate::literal::LiteralSearcher;
-use crate::pikevm;
-use crate::pool::{Pool, PoolGuard};
-use crate::prog::Program;
-use crate::re_builder::RegexOptions;
-use crate::re_bytes;
-use crate::re_set;
-use crate::re_trait::{Locations, RegularExpression, Slot};
-use crate::re_unicode;
-use crate::utf8::next_utf8;
+use dfa;
+use error::Error;
+use input::{ByteInput, CharInput};
+use literal::LiteralSearcher;
+use pikevm;
+use pool::{Pool, PoolGuard};
+use prog::Program;
+use re_builder::RegexOptions;
+use re_bytes;
+use re_set;
+use re_trait::{Locations, RegularExpression, Slot};
+use re_unicode;
+use utf8::next_utf8;
/// `Exec` manages the execution of a regular expression.
///
@@ -140,7 +140,7 @@ impl ExecBuilder {
///
/// Note that when compiling 2 or more regular expressions, capture groups
/// are completely unsupported. (This means both `find` and `captures`
- /// won't work.)
+ /// wont work.)
pub fn new_many<I, S>(res: I) -> Self
where
S: AsRef<str>,
@@ -373,6 +373,9 @@ impl ExecBuilder {
AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.auto_configure(&lits)
+ // We always want this to reduce size, regardless
+ // of what auto-configure does.
+ .byte_classes(true)
.build_with_size::<u32, _, _>(&lits)
// This should never happen because we'd long exceed the
// compilation limit for regexes first.
@@ -736,7 +739,7 @@ impl<'c> ExecNoSync<'c> {
text: &[u8],
start: usize,
) -> dfa::Result<(usize, usize)> {
- use crate::dfa::Result::*;
+ use dfa::Result::*;
let end = match dfa::Fsm::forward(
&self.ro.dfa,
self.cache.value(),
@@ -776,7 +779,7 @@ impl<'c> ExecNoSync<'c> {
text: &[u8],
start: usize,
) -> dfa::Result<(usize, usize)> {
- use crate::dfa::Result::*;
+ use dfa::Result::*;
match dfa::Fsm::reverse(
&self.ro.dfa_reverse,
self.cache.value(),
@@ -832,7 +835,7 @@ impl<'c> ExecNoSync<'c> {
text: &[u8],
original_start: usize,
) -> Option<dfa::Result<(usize, usize)>> {
- use crate::dfa::Result::*;
+ use dfa::Result::*;
let lcs = self.ro.suffixes.lcs();
debug_assert!(lcs.len() >= 1);
@@ -877,7 +880,7 @@ impl<'c> ExecNoSync<'c> {
text: &[u8],
start: usize,
) -> dfa::Result<(usize, usize)> {
- use crate::dfa::Result::*;
+ use dfa::Result::*;
let match_start = match self.exec_dfa_reverse_suffix(text, start) {
None => return self.find_dfa_forward(text, start),
@@ -1260,7 +1263,7 @@ impl<'c> ExecNoSyncStr<'c> {
impl Exec {
/// Get a searcher that isn't Sync.
#[cfg_attr(feature = "perf-inline", inline(always))]
- pub fn searcher(&self) -> ExecNoSync<'_> {
+ pub fn searcher(&self) -> ExecNoSync {
ExecNoSync {
ro: &self.ro, // a clone is too expensive here! (and not needed)
cache: self.pool.get(),
@@ -1269,7 +1272,7 @@ impl Exec {
/// Get a searcher that isn't Sync and can match on &str.
#[cfg_attr(feature = "perf-inline", inline(always))]
- pub fn searcher_str(&self) -> ExecNoSyncStr<'_> {
+ pub fn searcher_str(&self) -> ExecNoSyncStr {
ExecNoSyncStr(self.searcher())
}
@@ -1547,7 +1550,7 @@ impl ProgramCacheInner {
/// literals, and if so, returns them. Otherwise, this returns None.
#[cfg(feature = "perf-literal")]
fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
- use regex_syntax::hir::{HirKind, Literal};
+ use syntax::hir::{HirKind, Literal};
// This is pretty hacky, but basically, if `is_alternation_literal` is
// true, then we can make several assumptions about the structure of our
@@ -1599,7 +1602,7 @@ fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
mod test {
#[test]
fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
- use crate::internal::ExecBuilder;
+ use internal::ExecBuilder;
let backtrack_bytes_re = ExecBuilder::new("^S")
.bounded_backtracking()
@@ -1627,7 +1630,7 @@ mod test {
#[test]
fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
- use crate::internal::ExecBuilder;
+ use internal::ExecBuilder;
let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
.bounded_backtracking()
diff --git a/src/expand.rs b/src/expand.rs
index fd9c2d0..70dbf91 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -1,12 +1,12 @@
use std::str;
-use crate::find_byte::find_byte;
+use find_byte::find_byte;
-use crate::re_bytes;
-use crate::re_unicode;
+use re_bytes;
+use re_unicode;
pub fn expand_str(
- caps: &re_unicode::Captures<'_>,
+ caps: &re_unicode::Captures,
mut replacement: &str,
dst: &mut String,
) {
@@ -48,7 +48,7 @@ pub fn expand_str(
}
pub fn expand_bytes(
- caps: &re_bytes::Captures<'_>,
+ caps: &re_bytes::Captures,
mut replacement: &[u8],
dst: &mut Vec<u8>,
) {
@@ -125,7 +125,7 @@ impl From<usize> for Ref<'static> {
/// starting at the beginning of `replacement`.
///
/// If no such valid reference could be found, None is returned.
-fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
+fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
let mut i = 0;
let rep: &[u8] = replacement.as_ref();
if rep.len() <= 1 || rep[0] != b'$' {
@@ -157,7 +157,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
})
}
-fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> {
+fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef> {
let start = i;
while rep.get(i).map_or(false, |&b| b != b'}') {
i += 1;
diff --git a/src/input.rs b/src/input.rs
index 5d50ee3..3afa2d0 100644
--- a/src/input.rs
+++ b/src/input.rs
@@ -4,9 +4,11 @@ use std::fmt;
use std::ops;
use std::u32;
-use crate::literal::LiteralSearcher;
-use crate::prog::InstEmptyLook;
-use crate::utf8::{decode_last_utf8, decode_utf8};
+use syntax;
+
+use literal::LiteralSearcher;
+use prog::InstEmptyLook;
+use utf8::{decode_last_utf8, decode_utf8};
/// Represents a location in the input.
#[derive(Clone, Copy, Debug)]
@@ -173,7 +175,7 @@ impl<'t> Input for CharInput<'t> {
}
fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
- use crate::prog::EmptyLook::*;
+ use prog::EmptyLook::*;
match empty.look {
StartLine => {
let c = self.previous_char(at);
@@ -266,7 +268,7 @@ impl<'t> Input for ByteInput<'t> {
}
fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
- use crate::prog::EmptyLook::*;
+ use prog::EmptyLook::*;
match empty.look {
StartLine => {
let c = self.previous_char(at);
@@ -346,7 +348,7 @@ impl<'t> Input for ByteInput<'t> {
pub struct Char(u32);
impl fmt::Debug for Char {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match char::from_u32(self.0) {
None => write!(f, "Empty"),
Some(c) => write!(f, "{:?}", c),
@@ -377,7 +379,7 @@ impl Char {
// available. However, our compiler ensures that if a Unicode word
// boundary is used, then the data must also be available. If it isn't,
// then the compiler returns an error.
- char::from_u32(self.0).map_or(false, regex_syntax::is_word_character)
+ char::from_u32(self.0).map_or(false, syntax::is_word_character)
}
/// Returns true iff the byte is a word byte.
@@ -385,7 +387,7 @@ impl Char {
/// If the byte is absent, then false is returned.
pub fn is_word_byte(self) -> bool {
match char::from_u32(self.0) {
- Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8),
+ Some(c) if c <= '\u{7F}' => syntax::is_word_byte(c as u8),
None | Some(_) => false,
}
}
diff --git a/src/lib.rs b/src/lib.rs
index 7f2dec8..357ac0d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -22,6 +22,12 @@ used by adding `regex` to your dependencies in your project's `Cargo.toml`.
regex = "1"
```
+If you're using Rust 2015, then you'll also need to add it to your crate root:
+
+```rust
+extern crate regex;
+```
+
# Example: find a date
General use of regular expressions in this package involves compiling an
@@ -62,7 +68,9 @@ regular expressions are compiled exactly once.
For example:
```rust
-use lazy_static::lazy_static;
+#[macro_use] extern crate lazy_static;
+extern crate regex;
+
use regex::Regex;
fn some_helper_function(text: &str) -> bool {
@@ -86,7 +94,7 @@ matches. For example, to find all dates in a string and be able to access
them by their component pieces:
```rust
-# use regex::Regex;
+# extern crate regex; use regex::Regex;
# fn main() {
let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let text = "2012-03-14, 2013-01-01 and 2014-07-05";
@@ -111,7 +119,7 @@ clearer, we can *name* our capture groups and use those names as variables
in our replacement text:
```rust
-# use regex::Regex;
+# extern crate regex; use regex::Regex;
# fn main() {
let re = Regex::new(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})").unwrap();
let before = "2012-03-14, 2013-01-01 and 2014-07-05";
@@ -128,7 +136,7 @@ Note that if your regex gets complicated, you can use the `x` flag to
enable insignificant whitespace mode, which also lets you write comments:
```rust
-# use regex::Regex;
+# extern crate regex; use regex::Regex;
# fn main() {
let re = Regex::new(r"(?x)
(?P<y>\d{4}) # the year
@@ -209,7 +217,7 @@ Unicode scalar values. This means you can use Unicode characters directly
in your expression:
```rust
-# use regex::Regex;
+# extern crate regex; use regex::Regex;
# fn main() {
let re = Regex::new(r"(?i)Δ+").unwrap();
let mat = re.find("ΔδΔ").unwrap();
@@ -236,7 +244,7 @@ of boolean properties are available as character classes. For example, you can
match a sequence of numerals, Greek or Cherokee letters:
```rust
-# use regex::Regex;
+# extern crate regex; use regex::Regex;
# fn main() {
let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap();
let mat = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap();
@@ -383,7 +391,7 @@ Flags can be toggled within a pattern. Here's an example that matches
case-insensitively for the first part but case-sensitively for the second part:
```rust
-# use regex::Regex;
+# extern crate regex; use regex::Regex;
# fn main() {
let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
let cap = re.captures("AaAaAbbBBBb").unwrap();
@@ -417,7 +425,7 @@ Here is an example that uses an ASCII word boundary instead of a Unicode
word boundary:
```rust
-# use regex::Regex;
+# extern crate regex; use regex::Regex;
# fn main() {
let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap();
let cap = re.captures("$$abc$$").unwrap();
@@ -606,30 +614,38 @@ another matching engine with fixed memory requirements.
*/
#![deny(missing_docs)]
+#![cfg_attr(test, deny(warnings))]
#![cfg_attr(feature = "pattern", feature(pattern))]
#![warn(missing_debug_implementations)]
#[cfg(not(feature = "std"))]
compile_error!("`std` feature is currently required to build this crate");
-// To check README's example
-// TODO: Re-enable this once the MSRV is 1.43 or greater.
-// See: https://github.com/rust-lang/regex/issues/684
-// See: https://github.com/rust-lang/regex/issues/685
+#[cfg(feature = "perf-literal")]
+extern crate aho_corasick;
+// #[cfg(doctest)]
+// extern crate doc_comment;
+#[cfg(feature = "perf-literal")]
+extern crate memchr;
+#[cfg(test)]
+#[cfg_attr(feature = "perf-literal", macro_use)]
+extern crate quickcheck;
+extern crate regex_syntax as syntax;
+
// #[cfg(doctest)]
// doc_comment::doctest!("../README.md");
#[cfg(feature = "std")]
-pub use crate::error::Error;
+pub use error::Error;
#[cfg(feature = "std")]
-pub use crate::re_builder::set_unicode::*;
+pub use re_builder::set_unicode::*;
#[cfg(feature = "std")]
-pub use crate::re_builder::unicode::*;
+pub use re_builder::unicode::*;
#[cfg(feature = "std")]
-pub use crate::re_set::unicode::*;
+pub use re_set::unicode::*;
#[cfg(feature = "std")]
#[cfg(feature = "std")]
-pub use crate::re_unicode::{
+pub use re_unicode::{
escape, CaptureLocations, CaptureMatches, CaptureNames, Captures,
Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split,
SplitN, SubCaptureMatches,
@@ -724,10 +740,10 @@ performance on `&str`.
*/
#[cfg(feature = "std")]
pub mod bytes {
- pub use crate::re_builder::bytes::*;
- pub use crate::re_builder::set_bytes::*;
- pub use crate::re_bytes::*;
- pub use crate::re_set::bytes::*;
+ pub use re_builder::bytes::*;
+ pub use re_builder::set_bytes::*;
+ pub use re_bytes::*;
+ pub use re_set::bytes::*;
}
mod backtrack;
@@ -738,6 +754,8 @@ mod error;
mod exec;
mod expand;
mod find_byte;
+#[cfg(feature = "perf-literal")]
+mod freqs;
mod input;
mod literal;
#[cfg(feature = "pattern")]
@@ -759,9 +777,9 @@ mod utf8;
#[doc(hidden)]
#[cfg(feature = "std")]
pub mod internal {
- pub use crate::compile::Compiler;
- pub use crate::exec::{Exec, ExecBuilder};
- pub use crate::input::{Char, CharInput, Input, InputAt};
- pub use crate::literal::LiteralSearcher;
- pub use crate::prog::{EmptyLook, Inst, InstRanges, Program};
+ pub use compile::Compiler;
+ pub use exec::{Exec, ExecBuilder};
+ pub use input::{Char, CharInput, Input, InputAt};
+ pub use literal::LiteralSearcher;
+ pub use prog::{EmptyLook, Inst, InstRanges, Program};
}
diff --git a/src/literal/imp.rs b/src/literal/imp.rs
index 82f050a..e4d04ed 100644
--- a/src/literal/imp.rs
+++ b/src/literal/imp.rs
@@ -1,8 +1,11 @@
+use std::cmp;
use std::mem;
use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder};
-use memchr::{memchr, memchr2, memchr3, memmem};
-use regex_syntax::hir::literal::{Literal, Literals};
+use memchr::{memchr, memchr2, memchr3};
+use syntax::hir::literal::{Literal, Literals};
+
+use freqs::BYTE_FREQUENCIES;
/// A prefix extracted from a compiled regular expression.
///
@@ -12,8 +15,8 @@ use regex_syntax::hir::literal::{Literal, Literals};
#[derive(Clone, Debug)]
pub struct LiteralSearcher {
complete: bool,
- lcp: Memmem,
- lcs: Memmem,
+ lcp: FreqyPacked,
+ lcs: FreqyPacked,
matcher: Matcher,
}
@@ -23,8 +26,10 @@ enum Matcher {
Empty,
/// A set of four or more single byte literals.
Bytes(SingleByteSet),
- /// A single substring, using vector accelerated routines when available.
- Memmem(Memmem),
+ /// A single substring, find using memchr and frequency analysis.
+ FreqyPacked(FreqyPacked),
+ /// A single substring, find using Boyer-Moore.
+ BoyerMoore(BoyerMooreSearch),
/// An Aho-Corasick automaton.
AC { ac: AhoCorasick<u32>, lits: Vec<Literal> },
/// A packed multiple substring searcher, using SIMD.
@@ -58,8 +63,8 @@ impl LiteralSearcher {
let complete = lits.all_complete();
LiteralSearcher {
complete: complete,
- lcp: Memmem::new(lits.longest_common_prefix()),
- lcs: Memmem::new(lits.longest_common_suffix()),
+ lcp: FreqyPacked::new(lits.longest_common_prefix().to_vec()),
+ lcs: FreqyPacked::new(lits.longest_common_suffix().to_vec()),
matcher: matcher,
}
}
@@ -81,7 +86,8 @@ impl LiteralSearcher {
match self.matcher {
Empty => Some((0, 0)),
Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)),
- Memmem(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
+ FreqyPacked(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
+ BoyerMoore(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
AC { ref ac, .. } => {
ac.find(haystack).map(|m| (m.start(), m.end()))
}
@@ -118,23 +124,24 @@ impl LiteralSearcher {
}
/// Returns an iterator over all literals to be matched.
- pub fn iter(&self) -> LiteralIter<'_> {
+ pub fn iter(&self) -> LiteralIter {
match self.matcher {
Matcher::Empty => LiteralIter::Empty,
Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense),
- Matcher::Memmem(ref s) => LiteralIter::Single(&s.finder.needle()),
+ Matcher::FreqyPacked(ref s) => LiteralIter::Single(&s.pat),
+ Matcher::BoyerMoore(ref s) => LiteralIter::Single(&s.pattern),
Matcher::AC { ref lits, .. } => LiteralIter::AC(lits),
Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits),
}
}
/// Returns a matcher for the longest common prefix of this matcher.
- pub fn lcp(&self) -> &Memmem {
+ pub fn lcp(&self) -> &FreqyPacked {
&self.lcp
}
/// Returns a matcher for the longest common suffix of this matcher.
- pub fn lcs(&self) -> &Memmem {
+ pub fn lcs(&self) -> &FreqyPacked {
&self.lcs
}
@@ -149,7 +156,8 @@ impl LiteralSearcher {
match self.matcher {
Empty => 0,
Bytes(ref sset) => sset.dense.len(),
- Memmem(_) => 1,
+ FreqyPacked(_) => 1,
+ BoyerMoore(_) => 1,
AC { ref ac, .. } => ac.pattern_count(),
Packed { ref lits, .. } => lits.len(),
}
@@ -161,7 +169,8 @@ impl LiteralSearcher {
match self.matcher {
Empty => 0,
Bytes(ref sset) => sset.approximate_size(),
- Memmem(ref single) => single.approximate_size(),
+ FreqyPacked(ref single) => single.approximate_size(),
+ BoyerMoore(ref single) => single.approximate_size(),
AC { ref ac, .. } => ac.heap_bytes(),
Packed { ref s, .. } => s.heap_bytes(),
}
@@ -196,7 +205,12 @@ impl Matcher {
return Matcher::Bytes(sset);
}
if lits.literals().len() == 1 {
- return Matcher::Memmem(Memmem::new(&lits.literals()[0]));
+ let lit = lits.literals()[0].to_vec();
+ if BoyerMooreSearch::should_use(lit.as_slice()) {
+ return Matcher::BoyerMoore(BoyerMooreSearch::new(lit));
+ } else {
+ return Matcher::FreqyPacked(FreqyPacked::new(lit));
+ }
}
let pats = lits.literals().to_owned();
@@ -353,27 +367,116 @@ impl SingleByteSet {
}
}
-/// A simple wrapper around the memchr crate's memmem implementation.
+/// Provides an implementation of fast subtring search using frequency
+/// analysis.
///
-/// The API this exposes mirrors the API of previous substring searchers that
-/// this supplanted.
+/// memchr is so fast that we do everything we can to keep the loop in memchr
+/// for as long as possible. The easiest way to do this is to intelligently
+/// pick the byte to send to memchr. The best byte is the byte that occurs
+/// least frequently in the haystack. Since doing frequency analysis on the
+/// haystack is far too expensive, we compute a set of fixed frequencies up
+/// front and hard code them in src/freqs.rs. Frequency analysis is done via
+/// scripts/frequencies.py.
#[derive(Clone, Debug)]
-pub struct Memmem {
- finder: memmem::Finder<'static>,
+pub struct FreqyPacked {
+ /// The pattern.
+ pat: Vec<u8>,
+ /// The number of Unicode characters in the pattern. This is useful for
+ /// determining the effective length of a pattern when deciding which
+ /// optimizations to perform. A trailing incomplete UTF-8 sequence counts
+ /// as one character.
char_len: usize,
+ /// The rarest byte in the pattern, according to pre-computed frequency
+ /// analysis.
+ rare1: u8,
+ /// The offset of the rarest byte in `pat`.
+ rare1i: usize,
+ /// The second rarest byte in the pattern, according to pre-computed
+ /// frequency analysis. (This may be equivalent to the rarest byte.)
+ ///
+ /// The second rarest byte is used as a type of guard for quickly detecting
+ /// a mismatch after memchr locates an instance of the rarest byte. This
+ /// is a hedge against pathological cases where the pre-computed frequency
+ /// analysis may be off. (But of course, does not prevent *all*
+ /// pathological cases.)
+ rare2: u8,
+ /// The offset of the second rarest byte in `pat`.
+ rare2i: usize,
}
-impl Memmem {
- fn new(pat: &[u8]) -> Memmem {
- Memmem {
- finder: memmem::Finder::new(pat).into_owned(),
- char_len: char_len_lossy(pat),
+impl FreqyPacked {
+ fn new(pat: Vec<u8>) -> FreqyPacked {
+ if pat.is_empty() {
+ return FreqyPacked::empty();
+ }
+
+ // Find the rarest two bytes. Try to make them distinct (but it's not
+ // required).
+ let mut rare1 = pat[0];
+ let mut rare2 = pat[0];
+ for b in pat[1..].iter().cloned() {
+ if freq_rank(b) < freq_rank(rare1) {
+ rare1 = b;
+ }
+ }
+ for &b in &pat {
+ if rare1 == rare2 {
+ rare2 = b
+ } else if b != rare1 && freq_rank(b) < freq_rank(rare2) {
+ rare2 = b;
+ }
+ }
+
+ // And find the offsets of their last occurrences.
+ let rare1i = pat.iter().rposition(|&b| b == rare1).unwrap();
+ let rare2i = pat.iter().rposition(|&b| b == rare2).unwrap();
+
+ let char_len = char_len_lossy(&pat);
+ FreqyPacked {
+ pat: pat,
+ char_len: char_len,
+ rare1: rare1,
+ rare1i: rare1i,
+ rare2: rare2,
+ rare2i: rare2i,
+ }
+ }
+
+ fn empty() -> FreqyPacked {
+ FreqyPacked {
+ pat: vec![],
+ char_len: 0,
+ rare1: 0,
+ rare1i: 0,
+ rare2: 0,
+ rare2i: 0,
}
}
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn find(&self, haystack: &[u8]) -> Option<usize> {
- self.finder.find(haystack)
+ let pat = &*self.pat;
+ if haystack.len() < pat.len() || pat.is_empty() {
+ return None;
+ }
+ let mut i = self.rare1i;
+ while i < haystack.len() {
+ i += match memchr(self.rare1, &haystack[i..]) {
+ None => return None,
+ Some(i) => i,
+ };
+ let start = i - self.rare1i;
+ let end = start + pat.len();
+ if end > haystack.len() {
+ return None;
+ }
+ let aligned = &haystack[start..end];
+ if aligned[self.rare2i] == self.rare2 && aligned == &*self.pat {
+ return Some(start);
+ }
+ i += 1;
+ }
+ None
}
#[cfg_attr(feature = "perf-inline", inline(always))]
@@ -381,11 +484,11 @@ impl Memmem {
if text.len() < self.len() {
return false;
}
- &text[text.len() - self.len()..] == self.finder.needle()
+ text[text.len() - self.len()..] == *self.pat
}
pub fn len(&self) -> usize {
- self.finder.needle().len()
+ self.pat.len()
}
pub fn char_len(&self) -> usize {
@@ -393,10 +496,627 @@ impl Memmem {
}
fn approximate_size(&self) -> usize {
- self.finder.needle().len() * mem::size_of::<u8>()
+ self.pat.len() * mem::size_of::<u8>()
}
}
fn char_len_lossy(bytes: &[u8]) -> usize {
String::from_utf8_lossy(bytes).chars().count()
}
+
+/// An implementation of Tuned Boyer-Moore as laid out by
+/// Andrew Hume and Daniel Sunday in "Fast String Searching".
+/// O(n) in the size of the input.
+///
+/// Fast string searching algorithms come in many variations,
+/// but they can generally be described in terms of three main
+/// components.
+///
+/// The skip loop is where the string searcher wants to spend
+/// as much time as possible. Exactly which character in the
+/// pattern the skip loop examines varies from algorithm to
+/// algorithm, but in the simplest case this loop repeated
+/// looks at the last character in the pattern and jumps
+/// forward in the input if it is not in the pattern.
+/// Robert Boyer and J Moore called this the "fast" loop in
+/// their original paper.
+///
+/// The match loop is responsible for actually examining the
+/// whole potentially matching substring. In order to fail
+/// faster, the match loop sometimes has a guard test attached.
+/// The guard test uses frequency analysis of the different
+/// characters in the pattern to choose the least frequency
+/// occurring character and use it to find match failures
+/// as quickly as possible.
+///
+/// The shift rule governs how the algorithm will shuffle its
+/// test window in the event of a failure during the match loop.
+/// Certain shift rules allow the worst-case run time of the
+/// algorithm to be shown to be O(n) in the size of the input
+/// rather than O(nm) in the size of the input and the size
+/// of the pattern (as naive Boyer-Moore is).
+///
+/// "Fast String Searching", in addition to presenting a tuned
+/// algorithm, provides a comprehensive taxonomy of the many
+/// different flavors of string searchers. Under that taxonomy
+/// TBM, the algorithm implemented here, uses an unrolled fast
+/// skip loop with memchr fallback, a forward match loop with guard,
+/// and the mini Sunday's delta shift rule. To unpack that you'll have to
+/// read the paper.
+#[derive(Clone, Debug)]
+pub struct BoyerMooreSearch {
+ /// The pattern we are going to look for in the haystack.
+ pattern: Vec<u8>,
+
+ /// The skip table for the skip loop.
+ ///
+ /// Maps the character at the end of the input
+ /// to a shift.
+ skip_table: Vec<usize>,
+
+ /// The guard character (least frequently occurring char).
+ guard: u8,
+ /// The reverse-index of the guard character in the pattern.
+ guard_reverse_idx: usize,
+
+ /// Daniel Sunday's mini generalized delta2 shift table.
+ ///
+ /// We use a skip loop, so we only have to provide a shift
+ /// for the skip char (last char). This is why it is a mini
+ /// shift rule.
+ md2_shift: usize,
+}
+
+impl BoyerMooreSearch {
+ /// Create a new string searcher, performing whatever
+ /// compilation steps are required.
+ fn new(pattern: Vec<u8>) -> Self {
+ debug_assert!(!pattern.is_empty());
+
+ let (g, gi) = Self::select_guard(pattern.as_slice());
+ let skip_table = Self::compile_skip_table(pattern.as_slice());
+ let md2_shift = Self::compile_md2_shift(pattern.as_slice());
+ BoyerMooreSearch {
+ pattern: pattern,
+ skip_table: skip_table,
+ guard: g,
+ guard_reverse_idx: gi,
+ md2_shift: md2_shift,
+ }
+ }
+
+ /// Find the pattern in `haystack`, returning the offset
+ /// of the start of the first occurrence of the pattern
+ /// in `haystack`.
+ #[inline]
+ fn find(&self, haystack: &[u8]) -> Option<usize> {
+ if haystack.len() < self.pattern.len() {
+ return None;
+ }
+
+ let mut window_end = self.pattern.len() - 1;
+
+ // Inspired by the grep source. It is a way
+ // to do correct loop unrolling without having to place
+ // a crashpad of terminating charicters at the end in
+ // the way described in the Fast String Searching paper.
+ const NUM_UNROLL: usize = 10;
+ // 1 for the initial position, and 1 for the md2 shift
+ let short_circut = (NUM_UNROLL + 2) * self.pattern.len();
+
+ if haystack.len() > short_circut {
+ // just 1 for the md2 shift
+ let backstop =
+ haystack.len() - ((NUM_UNROLL + 1) * self.pattern.len());
+ loop {
+ window_end =
+ match self.skip_loop(haystack, window_end, backstop) {
+ Some(i) => i,
+ None => return None,
+ };
+ if window_end >= backstop {
+ break;
+ }
+
+ if self.check_match(haystack, window_end) {
+ return Some(window_end - (self.pattern.len() - 1));
+ } else {
+ let skip = self.skip_table[haystack[window_end] as usize];
+ window_end +=
+ if skip == 0 { self.md2_shift } else { skip };
+ continue;
+ }
+ }
+ }
+
+ // now process the input after the backstop
+ while window_end < haystack.len() {
+ let mut skip = self.skip_table[haystack[window_end] as usize];
+ if skip == 0 {
+ if self.check_match(haystack, window_end) {
+ return Some(window_end - (self.pattern.len() - 1));
+ } else {
+ skip = self.md2_shift;
+ }
+ }
+ window_end += skip;
+ }
+
+ None
+ }
+
+ fn len(&self) -> usize {
+ return self.pattern.len();
+ }
+
+ /// The key heuristic behind which the BoyerMooreSearch lives.
+ ///
+ /// See `rust-lang/regex/issues/408`.
+ ///
+ /// Tuned Boyer-Moore is actually pretty slow! It turns out a handrolled
+ /// platform-specific memchr routine with a bit of frequency
+ /// analysis sprinkled on top actually wins most of the time.
+ /// However, there are a few cases where Tuned Boyer-Moore still
+ /// wins.
+ ///
+ /// If the haystack is random, frequency analysis doesn't help us,
+ /// so Boyer-Moore will win for sufficiently large needles.
+ /// Unfortunately, there is no obvious way to determine this
+ /// ahead of time.
+ ///
+ /// If the pattern itself consists of very common characters,
+ /// frequency analysis won't get us anywhere. The most extreme
+ /// example of this is a pattern like `eeeeeeeeeeeeeeee`. Fortunately,
+ /// this case is wholly determined by the pattern, so we can actually
+ /// implement the heuristic.
+ ///
+ /// A third case is if the pattern is sufficiently long. The idea
+ /// here is that once the pattern gets long enough the Tuned
+ /// Boyer-Moore skip loop will start making strides long enough
+ /// to beat the asm deep magic that is memchr.
+ fn should_use(pattern: &[u8]) -> bool {
+ // The minimum pattern length required to use TBM.
+ const MIN_LEN: usize = 9;
+ // The minimum frequency rank (lower is rarer) that every byte in the
+ // pattern must have in order to use TBM. That is, if the pattern
+ // contains _any_ byte with a lower rank, then TBM won't be used.
+ const MIN_CUTOFF: usize = 150;
+ // The maximum frequency rank for any byte.
+ const MAX_CUTOFF: usize = 255;
+ // The scaling factor used to determine the actual cutoff frequency
+ // to use (keeping in mind that the minimum frequency rank is bounded
+ // by MIN_CUTOFF). This scaling factor is an attempt to make TBM more
+ // likely to be used as the pattern grows longer. That is, longer
+ // patterns permit somewhat less frequent bytes than shorter patterns,
+ // under the assumption that TBM gets better as the pattern gets
+ // longer.
+ const LEN_CUTOFF_PROPORTION: usize = 4;
+
+ let scaled_rank = pattern.len().wrapping_mul(LEN_CUTOFF_PROPORTION);
+ let cutoff = cmp::max(
+ MIN_CUTOFF,
+ MAX_CUTOFF - cmp::min(MAX_CUTOFF, scaled_rank),
+ );
+ // The pattern must be long enough to be worthwhile. e.g., memchr will
+ // be faster on `e` because it is short even though e is quite common.
+ pattern.len() > MIN_LEN
+ // all the bytes must be more common than the cutoff.
+ && pattern.iter().all(|c| freq_rank(*c) >= cutoff)
+ }
+
+ /// Check to see if there is a match at the given position
+ #[inline]
+ fn check_match(&self, haystack: &[u8], window_end: usize) -> bool {
+ // guard test
+ if haystack[window_end - self.guard_reverse_idx] != self.guard {
+ return false;
+ }
+
+ // match loop
+ let window_start = window_end - (self.pattern.len() - 1);
+ for i in 0..self.pattern.len() {
+ if self.pattern[i] != haystack[window_start + i] {
+ return false;
+ }
+ }
+
+ true
+ }
+
+ /// Skip forward according to the shift table.
+ ///
+ /// Returns the offset of the next occurrence
+ /// of the last char in the pattern, or the none
+ /// if it never reappears. If `skip_loop` hits the backstop
+ /// it will leave early.
+ #[inline]
+ fn skip_loop(
+ &self,
+ haystack: &[u8],
+ mut window_end: usize,
+ backstop: usize,
+ ) -> Option<usize> {
+ let window_end_snapshot = window_end;
+ let skip_of = |we: usize| -> usize {
+ // Unsafe might make this faster, but the benchmarks
+ // were hard to interpret.
+ self.skip_table[haystack[we] as usize]
+ };
+
+ loop {
+ let mut skip = skip_of(window_end);
+ window_end += skip;
+ skip = skip_of(window_end);
+ window_end += skip;
+ if skip != 0 {
+ skip = skip_of(window_end);
+ window_end += skip;
+ skip = skip_of(window_end);
+ window_end += skip;
+ skip = skip_of(window_end);
+ window_end += skip;
+ if skip != 0 {
+ skip = skip_of(window_end);
+ window_end += skip;
+ skip = skip_of(window_end);
+ window_end += skip;
+ skip = skip_of(window_end);
+ window_end += skip;
+ if skip != 0 {
+ skip = skip_of(window_end);
+ window_end += skip;
+ skip = skip_of(window_end);
+ window_end += skip;
+
+ // If ten iterations did not make at least 16 words
+ // worth of progress, we just fall back on memchr.
+ if window_end - window_end_snapshot
+ > 16 * mem::size_of::<usize>()
+ {
+ // Returning a window_end >= backstop will
+ // immediatly break us out of the inner loop in
+ // `find`.
+ if window_end >= backstop {
+ return Some(window_end);
+ }
+
+ continue; // we made enough progress
+ } else {
+ // In case we are already there, and so that
+ // we will catch the guard char.
+ window_end = window_end
+ .checked_sub(1 + self.guard_reverse_idx)
+ .unwrap_or(0);
+
+ match memchr(self.guard, &haystack[window_end..]) {
+ None => return None,
+ Some(g_idx) => {
+ return Some(
+ window_end
+ + g_idx
+ + self.guard_reverse_idx,
+ );
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return Some(window_end);
+ }
+ }
+
+ /// Compute the ufast skip table.
+ fn compile_skip_table(pattern: &[u8]) -> Vec<usize> {
+ let mut tab = vec![pattern.len(); 256];
+
+ // For every char in the pattern, we write a skip
+ // that will line us up with the rightmost occurrence.
+ //
+ // N.B. the sentinel (0) is written by the last
+ // loop iteration.
+ for (i, c) in pattern.iter().enumerate() {
+ tab[*c as usize] = (pattern.len() - 1) - i;
+ }
+
+ tab
+ }
+
+ /// Select the guard character based off of the precomputed
+ /// frequency table.
+ fn select_guard(pattern: &[u8]) -> (u8, usize) {
+ let mut rarest = pattern[0];
+ let mut rarest_rev_idx = pattern.len() - 1;
+ for (i, c) in pattern.iter().enumerate() {
+ if freq_rank(*c) < freq_rank(rarest) {
+ rarest = *c;
+ rarest_rev_idx = (pattern.len() - 1) - i;
+ }
+ }
+
+ (rarest, rarest_rev_idx)
+ }
+
+ /// If there is another occurrence of the skip
+ /// char, shift to it, otherwise just shift to
+ /// the next window.
+ fn compile_md2_shift(pattern: &[u8]) -> usize {
+ let shiftc = *pattern.last().unwrap();
+
+ // For a pattern of length 1 we will never apply the
+ // shift rule, so we use a poison value on the principle
+ // that failing fast is a good thing.
+ if pattern.len() == 1 {
+ return 0xDEADBEAF;
+ }
+
+ let mut i = pattern.len() - 2;
+ while i > 0 {
+ if pattern[i] == shiftc {
+ return (pattern.len() - 1) - i;
+ }
+ i -= 1;
+ }
+
+ // The skip char never re-occurs in the pattern, so
+ // we can just shift the whole window length.
+ pattern.len() - 1
+ }
+
+ fn approximate_size(&self) -> usize {
+ (self.pattern.len() * mem::size_of::<u8>())
+ + (256 * mem::size_of::<usize>()) // skip table
+ }
+}
+
+fn freq_rank(b: u8) -> usize {
+ BYTE_FREQUENCIES[b as usize] as usize
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{BoyerMooreSearch, FreqyPacked};
+
+ //
+ // Unit Tests
+ //
+
+ // The "hello, world" of string searching
+ #[test]
+ fn bm_find_subs() {
+ let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..]));
+ let haystack = b"I keep seeing patterns in this text";
+ assert_eq!(14, searcher.find(haystack).unwrap());
+ }
+
+ #[test]
+ fn bm_find_no_subs() {
+ let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..]));
+ let haystack = b"I keep seeing needles in this text";
+ assert_eq!(None, searcher.find(haystack));
+ }
+
+ //
+ // Regression Tests
+ //
+
+ #[test]
+ fn bm_skip_reset_bug() {
+ let haystack = vec![0, 0, 0, 0, 0, 1, 1, 0];
+ let needle = vec![0, 1, 1, 0];
+
+ let searcher = BoyerMooreSearch::new(needle);
+ let offset = searcher.find(haystack.as_slice()).unwrap();
+ assert_eq!(4, offset);
+ }
+
+ #[test]
+ fn bm_backstop_underflow_bug() {
+ let haystack = vec![0, 0];
+ let needle = vec![0, 0];
+
+ let searcher = BoyerMooreSearch::new(needle);
+ let offset = searcher.find(haystack.as_slice()).unwrap();
+ assert_eq!(0, offset);
+ }
+
+ #[test]
+ fn bm_naive_off_by_one_bug() {
+ let haystack = vec![91];
+ let needle = vec![91];
+
+ let naive_offset = naive_find(&needle, &haystack).unwrap();
+ assert_eq!(0, naive_offset);
+ }
+
+ #[test]
+ fn bm_memchr_fallback_indexing_bug() {
+ let mut haystack = vec![
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ];
+ let needle = vec![1, 1, 1, 1, 32, 32, 87];
+ let needle_start = haystack.len();
+ haystack.extend(needle.clone());
+
+ let searcher = BoyerMooreSearch::new(needle);
+ assert_eq!(needle_start, searcher.find(haystack.as_slice()).unwrap());
+ }
+
+ #[test]
+ fn bm_backstop_boundary() {
+ let haystack = b"\
+// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+e_data.clone_created(entity_id, entity_to_add.entity_id);
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+"
+ .to_vec();
+ let needle = b"clone_created".to_vec();
+
+ let searcher = BoyerMooreSearch::new(needle);
+ let result = searcher.find(&haystack);
+ assert_eq!(Some(43), result);
+ }
+
+ #[test]
+ fn bm_win_gnu_indexing_bug() {
+ let haystack_raw = vec![
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ];
+ let needle = vec![1, 1, 1, 1, 1, 1, 1];
+ let haystack = haystack_raw.as_slice();
+
+ BoyerMooreSearch::new(needle.clone()).find(haystack);
+ }
+
+ //
+ // QuickCheck Properties
+ //
+
+ use quickcheck::TestResult;
+
+ fn naive_find(needle: &[u8], haystack: &[u8]) -> Option<usize> {
+ assert!(needle.len() <= haystack.len());
+
+ for i in 0..(haystack.len() - (needle.len() - 1)) {
+ if haystack[i] == needle[0]
+ && &haystack[i..(i + needle.len())] == needle
+ {
+ return Some(i);
+ }
+ }
+
+ None
+ }
+
+ quickcheck! {
+ fn qc_bm_equals_nieve_find(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult {
+ if pile1.len() == 0 || pile2.len() == 0 {
+ return TestResult::discard();
+ }
+
+ let (needle, haystack) = if pile1.len() < pile2.len() {
+ (pile1, pile2.as_slice())
+ } else {
+ (pile2, pile1.as_slice())
+ };
+
+ let searcher = BoyerMooreSearch::new(needle.clone());
+ TestResult::from_bool(
+ searcher.find(haystack) == naive_find(&needle, haystack))
+ }
+
+ fn qc_bm_equals_single(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult {
+ if pile1.len() == 0 || pile2.len() == 0 {
+ return TestResult::discard();
+ }
+
+ let (needle, haystack) = if pile1.len() < pile2.len() {
+ (pile1, pile2.as_slice())
+ } else {
+ (pile2, pile1.as_slice())
+ };
+
+ let bm_searcher = BoyerMooreSearch::new(needle.clone());
+ let freqy_memchr = FreqyPacked::new(needle);
+ TestResult::from_bool(
+ bm_searcher.find(haystack) == freqy_memchr.find(haystack))
+ }
+
+ fn qc_bm_finds_trailing_needle(
+ haystack_pre: Vec<u8>,
+ needle: Vec<u8>
+ ) -> TestResult {
+ if needle.len() == 0 {
+ return TestResult::discard();
+ }
+
+ let mut haystack = haystack_pre.clone();
+ let searcher = BoyerMooreSearch::new(needle.clone());
+
+ if haystack.len() >= needle.len() &&
+ searcher.find(haystack.as_slice()).is_some() {
+ return TestResult::discard();
+ }
+
+ haystack.extend(needle.clone());
+
+ // What if the the tail of the haystack can start the
+ // needle?
+ let start = haystack_pre.len()
+ .checked_sub(needle.len())
+ .unwrap_or(0);
+ for i in 0..(needle.len() - 1) {
+ if searcher.find(&haystack[(i + start)..]).is_some() {
+ return TestResult::discard();
+ }
+ }
+
+ TestResult::from_bool(
+ searcher.find(haystack.as_slice())
+ .map(|x| x == haystack_pre.len())
+ .unwrap_or(false))
+ }
+
+ // qc_equals_* is only testing the negative case as @burntsushi
+ // pointed out in https://github.com/rust-lang/regex/issues/446.
+ // This quickcheck prop represents an effort to force testing of
+ // the positive case. qc_bm_finds_first and qc_bm_finds_trailing_needle
+ // already check some of the positive cases, but they don't cover
+ // cases where the needle is in the middle of haystack. This prop
+ // fills that hole.
+ fn qc_bm_finds_subslice(
+ haystack: Vec<u8>,
+ needle_start: usize,
+ needle_length: usize
+ ) -> TestResult {
+ if haystack.len() == 0 {
+ return TestResult::discard();
+ }
+
+ let needle_start = needle_start % haystack.len();
+ let needle_length = needle_length % (haystack.len() - needle_start);
+
+ if needle_length == 0 {
+ return TestResult::discard();
+ }
+
+ let needle = &haystack[needle_start..(needle_start + needle_length)];
+
+ let bm_searcher = BoyerMooreSearch::new(needle.to_vec());
+
+ let start = naive_find(&needle, &haystack);
+ match start {
+ None => TestResult::from_bool(false),
+ Some(nf_start) =>
+ TestResult::from_bool(
+ nf_start <= needle_start
+ && bm_searcher.find(&haystack) == start
+ )
+ }
+ }
+
+ fn qc_bm_finds_first(needle: Vec<u8>) -> TestResult {
+ if needle.len() == 0 {
+ return TestResult::discard();
+ }
+
+ let mut haystack = needle.clone();
+ let searcher = BoyerMooreSearch::new(needle.clone());
+ haystack.extend(needle);
+
+ TestResult::from_bool(
+ searcher.find(haystack.as_slice())
+ .map(|x| x == 0)
+ .unwrap_or(false))
+ }
+ }
+}
diff --git a/src/literal/mod.rs b/src/literal/mod.rs
index 980f523..783c63b 100644
--- a/src/literal/mod.rs
+++ b/src/literal/mod.rs
@@ -6,7 +6,7 @@ mod imp;
#[allow(missing_docs)]
#[cfg(not(feature = "perf-literal"))]
mod imp {
- use regex_syntax::hir::literal::Literals;
+ use syntax::hir::literal::Literals;
#[derive(Clone, Debug)]
pub struct LiteralSearcher(());
diff --git a/src/pattern.rs b/src/pattern.rs
index b4ffd8e..e942667 100644
--- a/src/pattern.rs
+++ b/src/pattern.rs
@@ -1,8 +1,7 @@
use std::str::pattern::{Pattern, SearchStep, Searcher};
-use crate::re_unicode::{Matches, Regex};
+use re_unicode::{Matches, Regex};
-#[derive(Debug)]
pub struct RegexSearcher<'r, 't> {
haystack: &'t str,
it: Matches<'r, 't>,
diff --git a/src/pikevm.rs b/src/pikevm.rs
index 9a14240..299087d 100644
--- a/src/pikevm.rs
+++ b/src/pikevm.rs
@@ -17,11 +17,11 @@
use std::mem;
-use crate::exec::ProgramCache;
-use crate::input::{Input, InputAt};
-use crate::prog::{InstPtr, Program};
-use crate::re_trait::Slot;
-use crate::sparse::SparseSet;
+use exec::ProgramCache;
+use input::{Input, InputAt};
+use prog::{InstPtr, Program};
+use re_trait::Slot;
+use sparse::SparseSet;
/// An NFA simulation matching engine.
#[derive(Debug)]
@@ -231,7 +231,7 @@ impl<'r, I: Input> Fsm<'r, I> {
at: InputAt,
at_next: InputAt,
) -> bool {
- use crate::prog::Inst::*;
+ use prog::Inst::*;
match self.prog[ip] {
Match(match_slot) => {
if match_slot < matches.len() {
@@ -300,7 +300,7 @@ impl<'r, I: Input> Fsm<'r, I> {
// traverse the set of states. We only push to the stack when we
// absolutely need recursion (restoring captures or following a
// branch).
- use crate::prog::Inst::*;
+ use prog::Inst::*;
loop {
// Don't visit states we've already added.
if nlist.set.contains(ip) {
diff --git a/src/pool.rs b/src/pool.rs
index 6a6f15b..a506ee9 100644
--- a/src/pool.rs
+++ b/src/pool.rs
@@ -154,7 +154,7 @@ pub struct Pool<T> {
unsafe impl<T: Send> Sync for Pool<T> {}
impl<T: ::std::fmt::Debug> ::std::fmt::Debug for Pool<T> {
- fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
f.debug_struct("Pool")
.field("stack", &self.stack)
.field("owner", &self.owner)
@@ -168,7 +168,7 @@ impl<T: ::std::fmt::Debug> ::std::fmt::Debug for Pool<T> {
/// The purpose of the guard is to use RAII to automatically put the value back
/// in the pool once it's dropped.
#[derive(Debug)]
-pub struct PoolGuard<'a, T: Send> {
+pub struct PoolGuard<'a, T: 'a + Send> {
/// The pool that this guard is attached to.
pool: &'a Pool<T>,
/// This is None when the guard represents the special "owned" value. In
@@ -193,7 +193,7 @@ impl<T: Send> Pool<T> {
/// the value to go back into the pool) and then calling get again is NOT
/// guaranteed to return the same value received in the first get call.
#[cfg_attr(feature = "perf-inline", inline(always))]
- pub fn get(&self) -> PoolGuard<'_, T> {
+ pub fn get(&self) -> PoolGuard<T> {
// Our fast path checks if the caller is the thread that "owns" this
// pool. Or stated differently, whether it is the first thread that
// tried to extract a value from the pool. If it is, then we can return
@@ -217,7 +217,7 @@ impl<T: Send> Pool<T> {
///
/// If the pool has no owner, then this will set the owner.
#[cold]
- fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard<'_, T> {
+ fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard<T> {
use std::sync::atomic::Ordering::Relaxed;
if owner == 0 {
@@ -284,7 +284,7 @@ mod tests {
#[test]
fn oibits() {
- use crate::exec::ProgramCache;
+ use exec::ProgramCache;
fn has_oibits<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
has_oibits::<Pool<ProgramCache>>();
diff --git a/src/prog.rs b/src/prog.rs
index 475a811..74e5f2f 100644
--- a/src/prog.rs
+++ b/src/prog.rs
@@ -6,8 +6,8 @@ use std::ops::Deref;
use std::slice;
use std::sync::Arc;
-use crate::input::Char;
-use crate::literal::LiteralSearcher;
+use input::Char;
+use literal::LiteralSearcher;
/// `InstPtr` represents the index of an instruction in a regex program.
pub type InstPtr = usize;
@@ -168,7 +168,7 @@ impl Deref for Program {
}
impl fmt::Debug for Program {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Inst::*;
fn with_goto(cur: usize, goto: usize, fmtd: String) -> String {
@@ -259,8 +259,8 @@ impl<'a> IntoIterator for &'a Program {
///
/// Other than the benefit of moving invariants into the type system, another
/// benefit is the decreased size. If we remove the `Char` and `Ranges`
-/// instructions from the `Inst` enum, then its size shrinks from 32 bytes to
-/// 24 bytes. (This is because of the removal of a `Box<[]>` in the `Ranges`
+/// instructions from the `Inst` enum, then its size shrinks from 40 bytes to
+/// 24 bytes. (This is because of the removal of a `Vec` in the `Ranges`
/// variant.) Given that byte based machines are typically much bigger than
/// their Unicode analogues (because they can decode UTF-8 directly), this ends
/// up being a pretty significant savings.
@@ -374,7 +374,7 @@ pub struct InstRanges {
/// succeeds.
pub goto: InstPtr,
/// The set of Unicode scalar value ranges to test.
- pub ranges: Box<[(char, char)]>,
+ pub ranges: Vec<(char, char)>,
}
impl InstRanges {
@@ -432,16 +432,3 @@ impl InstBytes {
self.start <= byte && byte <= self.end
}
}
-
-#[cfg(test)]
-mod test {
- #[test]
- #[cfg(target_pointer_width = "64")]
- fn test_size_of_inst() {
- use std::mem::size_of;
-
- use super::Inst;
-
- assert_eq!(32, size_of::<Inst>());
- }
-}
diff --git a/src/re_builder.rs b/src/re_builder.rs
index ee63836..fc140f8 100644
--- a/src/re_builder.rs
+++ b/src/re_builder.rs
@@ -37,10 +37,10 @@ macro_rules! define_builder {
($name:ident, $regex_mod:ident, $only_utf8:expr) => {
pub mod $name {
use super::RegexOptions;
- use crate::error::Error;
- use crate::exec::ExecBuilder;
+ use error::Error;
+ use exec::ExecBuilder;
- use crate::$regex_mod::Regex;
+ use $regex_mod::Regex;
/// A configurable builder for a regular expression.
///
@@ -235,10 +235,10 @@ macro_rules! define_set_builder {
($name:ident, $regex_mod:ident, $only_utf8:expr) => {
pub mod $name {
use super::RegexOptions;
- use crate::error::Error;
- use crate::exec::ExecBuilder;
+ use error::Error;
+ use exec::ExecBuilder;
- use crate::re_set::$regex_mod::RegexSet;
+ use re_set::$regex_mod::RegexSet;
/// A configurable builder for a set of regular expressions.
///
diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index ae55d6d..204a70a 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -6,13 +6,13 @@ use std::ops::{Index, Range};
use std::str::FromStr;
use std::sync::Arc;
-use crate::find_byte::find_byte;
+use find_byte::find_byte;
-use crate::error::Error;
-use crate::exec::{Exec, ExecNoSync};
-use crate::expand::expand_bytes;
-use crate::re_builder::bytes::RegexBuilder;
-use crate::re_trait::{self, RegularExpression, SubCapturesPosIter};
+use error::Error;
+use exec::{Exec, ExecNoSync};
+use expand::expand_bytes;
+use re_builder::bytes::RegexBuilder;
+use re_trait::{self, RegularExpression, SubCapturesPosIter};
/// Match represents a single match of a regex in a haystack.
///
@@ -79,14 +79,14 @@ pub struct Regex(Exec);
impl fmt::Display for Regex {
/// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl fmt::Debug for Regex {
/// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
@@ -133,7 +133,7 @@ impl Regex {
/// bytes:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let text = b"I categorically deny having triskaidekaphobia.";
/// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
@@ -156,7 +156,7 @@ impl Regex {
/// ASCII word bytes:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let text = b"I categorically deny having triskaidekaphobia.";
/// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
@@ -177,7 +177,7 @@ impl Regex {
/// word bytes:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let text = b"Retroactively relinquishing remunerations is reprehensible.";
/// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
@@ -205,7 +205,7 @@ impl Regex {
/// year separately.
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
/// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
@@ -227,7 +227,7 @@ impl Regex {
/// We can make this example a bit clearer by using *named* capture groups:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
@@ -271,7 +271,7 @@ impl Regex {
/// some text, where the movie is formatted like "'Title' (xxxx)":
///
/// ```rust
- /// # use std::str; use regex::bytes::Regex;
+ /// # extern crate regex; use std::str; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
@@ -305,7 +305,7 @@ impl Regex {
/// To split a string delimited by arbitrary amounts of spaces or tabs:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"[ \t]+").unwrap();
/// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect();
@@ -331,7 +331,7 @@ impl Regex {
/// Get the first two words in some text:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"\W+").unwrap();
/// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect();
@@ -379,7 +379,7 @@ impl Regex {
/// In typical usage, this can just be a normal byte string:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new("[^01]+").unwrap();
/// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]);
@@ -392,7 +392,7 @@ impl Regex {
/// group matches easily:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # use regex::bytes::Captures; fn main() {
/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| {
@@ -411,7 +411,7 @@ impl Regex {
/// with named capture groups:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
/// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]);
@@ -428,7 +428,7 @@ impl Regex {
/// underscore:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace(b"deep fried", &b"${first}_$second"[..]);
@@ -445,7 +445,7 @@ impl Regex {
/// byte string with `NoExpand`:
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// use regex::bytes::NoExpand;
///
@@ -546,7 +546,7 @@ impl Regex {
/// `a`.
///
/// ```rust
- /// # use regex::bytes::Regex;
+ /// # extern crate regex; use regex::bytes::Regex;
/// # fn main() {
/// let text = b"aaaaa";
/// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
@@ -658,7 +658,7 @@ impl Regex {
}
/// Returns an iterator over the capture names.
- pub fn capture_names(&self) -> CaptureNames<'_> {
+ pub fn capture_names(&self) -> CaptureNames {
CaptureNames(self.0.capture_names().iter())
}
@@ -990,15 +990,15 @@ impl<'t> Captures<'t> {
}
impl<'t> fmt::Debug for Captures<'t> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
}
}
-struct CapturesDebug<'c, 't>(&'c Captures<'t>);
+struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn escape_bytes(bytes: &[u8]) -> String {
let mut s = String::new();
for &b in bytes {
@@ -1084,7 +1084,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
#[derive(Clone, Debug)]
-pub struct SubCaptureMatches<'c, 't> {
+pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
}
@@ -1116,7 +1116,7 @@ pub trait Replacer {
///
/// For example, a no-op replacement would be
/// `dst.extend(&caps[0])`.
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>);
+ fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>);
/// Return a fixed unchanging replacement byte string.
///
@@ -1159,10 +1159,10 @@ pub trait Replacer {
///
/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
#[derive(Debug)]
-pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
+pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
self.0.replace_append(caps, dst)
}
fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
@@ -1171,56 +1171,56 @@ impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
}
impl<'a> Replacer for &'a [u8] {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
caps.expand(*self, dst);
}
- fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Vec<u8> {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
caps.expand(*self, dst);
}
- fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
no_expansion(self)
}
}
impl Replacer for Vec<u8> {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
caps.expand(self, dst);
}
- fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for Cow<'a, [u8]> {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
caps.expand(self.as_ref(), dst);
}
- fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Cow<'a, [u8]> {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
caps.expand(self.as_ref(), dst);
}
- fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
no_expansion(self)
}
}
-fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> {
+fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<[u8]>> {
let s = t.as_ref();
match find_byte(b'$', s) {
Some(_) => None,
@@ -1230,10 +1230,10 @@ fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> {
impl<F, T> Replacer for F
where
- F: FnMut(&Captures<'_>) -> T,
+ F: FnMut(&Captures) -> T,
T: AsRef<[u8]>,
{
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
dst.extend_from_slice((*self)(caps).as_ref());
}
}
@@ -1250,11 +1250,11 @@ where
pub struct NoExpand<'t>(pub &'t [u8]);
impl<'t> Replacer for NoExpand<'t> {
- fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, _: &Captures, dst: &mut Vec<u8>) {
dst.extend_from_slice(self.0);
}
- fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(self.0))
}
}
diff --git a/src/re_set.rs b/src/re_set.rs
index 73d5953..5cb47ad 100644
--- a/src/re_set.rs
+++ b/src/re_set.rs
@@ -7,10 +7,10 @@ macro_rules! define_set {
use std::slice;
use std::vec;
- use crate::error::Error;
- use crate::exec::Exec;
- use crate::re_builder::$builder_mod::RegexSetBuilder;
- use crate::re_trait::RegularExpression;
+ use error::Error;
+ use exec::Exec;
+ use re_builder::$builder_mod::RegexSetBuilder;
+ use re_trait::RegularExpression;
/// Match multiple (possibly overlapping) regular expressions in a single scan.
///
@@ -292,7 +292,7 @@ impl SetMatches {
/// This will always produces matches in ascending order of index, where
/// the index corresponds to the index of the regex that matched with
/// respect to its position when initially building the set.
- pub fn iter(&self) -> SetMatchesIter<'_> {
+ pub fn iter(&self) -> SetMatchesIter {
SetMatchesIter((&*self.matches).into_iter().enumerate())
}
}
@@ -405,7 +405,7 @@ impl From<Exec> for RegexSet {
}
impl fmt::Debug for RegexSet {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "RegexSet({:?})", self.0.regex_strings())
}
}
diff --git a/src/re_trait.rs b/src/re_trait.rs
index 680aa54..ea6be9c 100644
--- a/src/re_trait.rs
+++ b/src/re_trait.rs
@@ -30,7 +30,7 @@ impl Locations {
/// Creates an iterator of all the capture group positions in order of
/// appearance in the regular expression. Positions are byte indices
/// in terms of the original string matched.
- pub fn iter(&self) -> SubCapturesPosIter<'_> {
+ pub fn iter(&self) -> SubCapturesPosIter {
SubCapturesPosIter { idx: 0, locs: self }
}
@@ -138,13 +138,13 @@ pub trait RegularExpression: Sized + fmt::Debug {
/// Returns an iterator over all non-overlapping successive leftmost-first
/// matches.
- fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> {
+ fn find_iter(self, text: &Self::Text) -> Matches<Self> {
Matches { re: self, text: text, last_end: 0, last_match: None }
}
/// Returns an iterator over all non-overlapping successive leftmost-first
/// matches with captures.
- fn captures_iter(self, text: &Self::Text) -> CaptureMatches<'_, Self> {
+ fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
CaptureMatches(self.find_iter(text))
}
}
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index 142c78f..1b478cd 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -6,20 +6,21 @@ use std::ops::{Index, Range};
use std::str::FromStr;
use std::sync::Arc;
-use crate::find_byte::find_byte;
+use find_byte::find_byte;
+use syntax;
-use crate::error::Error;
-use crate::exec::{Exec, ExecNoSyncStr};
-use crate::expand::expand_str;
-use crate::re_builder::unicode::RegexBuilder;
-use crate::re_trait::{self, RegularExpression, SubCapturesPosIter};
+use error::Error;
+use exec::{Exec, ExecNoSyncStr};
+use expand::expand_str;
+use re_builder::unicode::RegexBuilder;
+use re_trait::{self, RegularExpression, SubCapturesPosIter};
/// Escapes all regular expression meta characters in `text`.
///
/// The string returned may be safely used as a literal in a regular
/// expression.
pub fn escape(text: &str) -> String {
- regex_syntax::escape(text)
+ syntax::escape(text)
}
/// Match represents a single match of a regex in a haystack.
@@ -137,14 +138,14 @@ pub struct Regex(Exec);
impl fmt::Display for Regex {
/// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl fmt::Debug for Regex {
/// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
@@ -188,7 +189,7 @@ impl Regex {
/// Unicode word characters:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
@@ -211,7 +212,7 @@ impl Regex {
/// Unicode word characters:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
@@ -233,7 +234,7 @@ impl Regex {
/// word characters:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "Retroactively relinquishing remunerations is reprehensible.";
/// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
@@ -261,7 +262,7 @@ impl Regex {
/// year separately.
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
@@ -283,7 +284,7 @@ impl Regex {
/// We can make this example a bit clearer by using *named* capture groups:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
@@ -327,7 +328,7 @@ impl Regex {
/// some text, where the movie is formatted like "'Title' (xxxx)":
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
@@ -360,7 +361,7 @@ impl Regex {
/// To split a string delimited by arbitrary amounts of spaces or tabs:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"[ \t]+").unwrap();
/// let fields: Vec<&str> = re.split("a b \t c\td e").collect();
@@ -384,7 +385,7 @@ impl Regex {
/// Get the first two words in some text:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"\W+").unwrap();
/// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
@@ -431,7 +432,7 @@ impl Regex {
/// In typical usage, this can just be a normal string:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new("[^01]+").unwrap();
/// assert_eq!(re.replace("1078910", ""), "1010");
@@ -444,7 +445,7 @@ impl Regex {
/// capturing group matches easily:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # use regex::Captures; fn main() {
/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
@@ -460,7 +461,7 @@ impl Regex {
/// with named capture groups:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", "$first $last");
@@ -477,7 +478,7 @@ impl Regex {
/// underscore:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace("deep fried", "${first}_$second");
@@ -494,7 +495,7 @@ impl Regex {
/// byte string with `NoExpand`:
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// use regex::NoExpand;
///
@@ -604,7 +605,7 @@ impl Regex {
/// `a`.
///
/// ```rust
- /// # use regex::Regex;
+ /// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "aaaaa";
/// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
@@ -716,7 +717,7 @@ impl Regex {
}
/// Returns an iterator over the capture names.
- pub fn capture_names(&self) -> CaptureNames<'_> {
+ pub fn capture_names(&self) -> CaptureNames {
CaptureNames(self.0.capture_names().iter())
}
@@ -1000,15 +1001,15 @@ impl<'t> Captures<'t> {
}
impl<'t> fmt::Debug for Captures<'t> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
}
}
-struct CapturesDebug<'c, 't>(&'c Captures<'t>);
+struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// We'd like to show something nice here, even if it means an
// allocation to build a reverse index.
let slot_to_name: HashMap<&usize, &String> =
@@ -1079,7 +1080,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
#[derive(Clone, Debug)]
-pub struct SubCaptureMatches<'c, 't> {
+pub struct SubCaptureMatches<'c, 't: 'c> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
}
@@ -1157,7 +1158,7 @@ pub trait Replacer {
///
/// For example, a no-op replacement would be
/// `dst.push_str(caps.get(0).unwrap().as_str())`.
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String);
+ fn replace_append(&mut self, caps: &Captures, dst: &mut String);
/// Return a fixed unchanging replacement string.
///
@@ -1200,68 +1201,68 @@ pub trait Replacer {
///
/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
#[derive(Debug)]
-pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
+pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
self.0.replace_append(caps, dst)
}
- fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
+ fn no_expansion(&mut self) -> Option<Cow<str>> {
self.0.no_expansion()
}
}
impl<'a> Replacer for &'a str {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
caps.expand(*self, dst);
}
- fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
+ fn no_expansion(&mut self) -> Option<Cow<str>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a String {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
self.as_str().replace_append(caps, dst)
}
- fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
+ fn no_expansion(&mut self) -> Option<Cow<str>> {
no_expansion(self)
}
}
impl Replacer for String {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
self.as_str().replace_append(caps, dst)
}
- fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
+ fn no_expansion(&mut self) -> Option<Cow<str>> {
no_expansion(self)
}
}
impl<'a> Replacer for Cow<'a, str> {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
self.as_ref().replace_append(caps, dst)
}
- fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
+ fn no_expansion(&mut self) -> Option<Cow<str>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Cow<'a, str> {
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
self.as_ref().replace_append(caps, dst)
}
- fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
+ fn no_expansion(&mut self) -> Option<Cow<str>> {
no_expansion(self)
}
}
-fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> {
+fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<str>> {
let s = t.as_ref();
match find_byte(b'$', s.as_bytes()) {
Some(_) => None,
@@ -1271,10 +1272,10 @@ fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> {
impl<F, T> Replacer for F
where
- F: FnMut(&Captures<'_>) -> T,
+ F: FnMut(&Captures) -> T,
T: AsRef<str>,
{
- fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
+ fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
dst.push_str((*self)(caps).as_ref());
}
}
@@ -1291,11 +1292,11 @@ where
pub struct NoExpand<'t>(pub &'t str);
impl<'t> Replacer for NoExpand<'t> {
- fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) {
+ fn replace_append(&mut self, _: &Captures, dst: &mut String) {
dst.push_str(self.0);
}
- fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
+ fn no_expansion(&mut self) -> Option<Cow<str>> {
Some(Cow::Borrowed(self.0))
}
}
diff --git a/src/sparse.rs b/src/sparse.rs
index 98b7266..421d6b6 100644
--- a/src/sparse.rs
+++ b/src/sparse.rs
@@ -62,7 +62,7 @@ impl SparseSet {
}
impl fmt::Debug for SparseSet {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "SparseSet({:?})", self.dense)
}
}
diff --git a/test b/test
index b10564f..3d1351c 100755
--- a/test
+++ b/test
@@ -1,7 +1,5 @@
#!/bin/bash
-set -e
-
# This is a convenience script for running a broad swath of tests across
# features. We don't test the complete space, since the complete space is quite
# large. Hopefully once we migrate the test suite to better infrastructure
diff --git a/tests/regression_fuzz.rs b/tests/regression_fuzz.rs
index 4e76704..5f92ed0 100644
--- a/tests/regression_fuzz.rs
+++ b/tests/regression_fuzz.rs
@@ -17,15 +17,3 @@ fn fuzz1() {
fn empty_any_errors_no_panic() {
assert!(regex_new!(r"\P{any}").is_err());
}
-
-// This tests that a very large regex errors during compilation instead of
-// using gratuitous amounts of memory. The specific problem is that the
-// compiler wasn't accounting for the memory used by Unicode character classes
-// correctly.
-//
-// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579
-#[test]
-fn big_regex_fails_to_compile() {
- let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}";
- assert!(regex_new!(pat).is_err());
-}
diff --git a/tests/replace.rs b/tests/replace.rs
index 1dc6106..700aff2 100644
--- a/tests/replace.rs
+++ b/tests/replace.rs
@@ -94,7 +94,7 @@ replace!(
replace,
r"([0-9]+)",
"age: 26",
- |captures: &Captures<'_>| {
+ |captures: &Captures| {
match_text!(captures.get(1).unwrap())[0..1].to_owned()
},
"age: 2"
@@ -104,7 +104,7 @@ replace!(
replace,
r"[0-9]+",
"age: 26",
- |_captures: &Captures<'_>| t!("Z").to_owned(),
+ |_captures: &Captures| t!("Z").to_owned(),
"age: Z"
);
diff --git a/tests/test_backtrack.rs b/tests/test_backtrack.rs
index fb934e2..617185f 100644
--- a/tests/test_backtrack.rs
+++ b/tests/test_backtrack.rs
@@ -1,5 +1,8 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
+extern crate rand;
+extern crate regex;
+
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;
diff --git a/tests/test_backtrack_bytes.rs b/tests/test_backtrack_bytes.rs
index a59426c..17df4d8 100644
--- a/tests/test_backtrack_bytes.rs
+++ b/tests/test_backtrack_bytes.rs
@@ -1,3 +1,6 @@
+extern crate rand;
+extern crate regex;
+
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;
diff --git a/tests/test_backtrack_utf8bytes.rs b/tests/test_backtrack_utf8bytes.rs
index 6d308e9..78a0135 100644
--- a/tests/test_backtrack_utf8bytes.rs
+++ b/tests/test_backtrack_utf8bytes.rs
@@ -1,5 +1,8 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
+extern crate rand;
+extern crate regex;
+
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;
diff --git a/tests/test_crates_regex.rs b/tests/test_crates_regex.rs
index a681604..d697683 100644
--- a/tests/test_crates_regex.rs
+++ b/tests/test_crates_regex.rs
@@ -1,3 +1,6 @@
+extern crate quickcheck;
+extern crate regex;
+
/*
* This test is a minimal version of <rofl_0> and <subdiff_0>
*
diff --git a/tests/test_default.rs b/tests/test_default.rs
index d4365fb..af634a0 100644
--- a/tests/test_default.rs
+++ b/tests/test_default.rs
@@ -1,6 +1,7 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
-use regex;
+extern crate rand;
+extern crate regex;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
diff --git a/tests/test_default_bytes.rs b/tests/test_default_bytes.rs
index f200596..e4a25dc 100644
--- a/tests/test_default_bytes.rs
+++ b/tests/test_default_bytes.rs
@@ -1,3 +1,6 @@
+extern crate rand;
+extern crate regex;
+
macro_rules! regex_new {
($re:expr) => {{
use regex::bytes::Regex;
diff --git a/tests/test_nfa.rs b/tests/test_nfa.rs
index e5a67d1..05dad23 100644
--- a/tests/test_nfa.rs
+++ b/tests/test_nfa.rs
@@ -1,5 +1,8 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
+extern crate rand;
+extern crate regex;
+
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;
diff --git a/tests/test_nfa_bytes.rs b/tests/test_nfa_bytes.rs
index 0a10e03..1042318 100644
--- a/tests/test_nfa_bytes.rs
+++ b/tests/test_nfa_bytes.rs
@@ -1,3 +1,6 @@
+extern crate rand;
+extern crate regex;
+
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;
diff --git a/tests/test_nfa_utf8bytes.rs b/tests/test_nfa_utf8bytes.rs
index 36a572b..86487a1 100644
--- a/tests/test_nfa_utf8bytes.rs
+++ b/tests/test_nfa_utf8bytes.rs
@@ -1,5 +1,8 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
+extern crate rand;
+extern crate regex;
+
macro_rules! regex_new {
($re:expr) => {{
use regex::internal::ExecBuilder;