Snap for 10453563 from 14bea0e36ba5a038a98be2269a0450fe90c9d229 to mainline-permission-releaseaml_per_341711000 aml_per_341614000 aml_per_341510010 aml_per_341410020 aml_per_341311000 aml_per_341110020 aml_per_341110010 aml_per_341011100 aml_per_341011020 aml_per_340916010 android14-mainline-permission-release

Change-Id: Ide292ec0881a70df1db3d8cab48f3617f3c61953
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2023-07-07 05:08:34 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2023-07-07 05:08:34 +0000
commit: 040d3c7b6da7ad05ec0b007fec02bf26479c256e (patch)
tree: d050b095d3d08bc7800c98bf5fe9e2ec15a35921
parent: 6e92301fee3c847c54ffdab6da1b21c4a7deb1c6 (diff)
parent: 14bea0e36ba5a038a98be2269a0450fe90c9d229 (diff)
download: regex-android14-mainline-permission-release.tar.gz
29 files changed, 490 insertions, 180 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 51b3cd6..a82e282 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,6 @@
 {
   "git": {
-    "sha1": "f2dc1b788f773a49f1b6633a6302054978344452"
-  }
-}
+    "sha1": "9582040009820380a16819ca0d1ae262c7d454b0"
+  },
+  "path_in_vcs": ""
+}
+\ No newline at end of file
diff --git a/Android.bp b/Android.bp
index 4fe3543..5b27d25 100644
--- a/Android.bp
+++ b/Android.bp
@@ -43,7 +43,7 @@ rust_library {
     host_supported: true,
     crate_name: "regex",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["src/lib.rs"],
     edition: "2018",
     features: [
@@ -75,6 +75,8 @@ rust_library {
         "com.android.compos",
         "com.android.virt",
     ],
+    product_available: true,
+    vendor_available: true,
 }
 
 rust_test {
@@ -82,7 +84,7 @@ rust_test {
     host_supported: true,
     crate_name: "regex",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["src/lib.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -124,7 +126,7 @@ rust_test {
     host_supported: true,
     crate_name: "backtrack",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_backtrack.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -167,7 +169,7 @@ rust_test {
     host_supported: true,
     crate_name: "backtrack_bytes",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_backtrack_bytes.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -210,7 +212,7 @@ rust_test {
     host_supported: true,
     crate_name: "backtrack_utf8bytes",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_backtrack_utf8bytes.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -253,7 +255,7 @@ rust_test {
     host_supported: true,
     crate_name: "crates_regex",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_crates_regex.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -296,7 +298,7 @@ rust_test {
     host_supported: true,
     crate_name: "default",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_default.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -339,7 +341,7 @@ rust_test {
     host_supported: true,
     crate_name: "default_bytes",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_default_bytes.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -382,7 +384,7 @@ rust_test {
     host_supported: true,
     crate_name: "nfa",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_nfa.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -425,7 +427,7 @@ rust_test {
     host_supported: true,
     crate_name: "nfa_bytes",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_nfa_bytes.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -468,7 +470,7 @@ rust_test {
     host_supported: true,
     crate_name: "nfa_utf8bytes",
     cargo_env_compat: true,
-    cargo_pkg_version: "1.5.4",
+    cargo_pkg_version: "1.7.3",
     srcs: ["tests/test_nfa_utf8bytes.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 71d1963..44274ac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,98 @@
+1.7.3 (2023-03-24)
+==================
+This is a small release that fixes a bug in `Regex::shortest_match_at` that
+could cause it to panic, even when the offset given is valid.
+
+Bug fixes:
+
+* [BUG #969](https://github.com/rust-lang/regex/issues/969):
+  Fix a bug in how the reverse DFA was called for `Regex::shortest_match_at`.
+
+
+1.7.2 (2023-03-21)
+==================
+This is a small release that fixes a failing test on FreeBSD.
+
+Bug fixes:
+
+* [BUG #967](https://github.com/rust-lang/regex/issues/967):
+  Fix "no stack overflow" test which can fail due to the small stack size.
+
+
+1.7.1 (2023-01-09)
+==================
+This release was done principally to try and fix the doc.rs rendering for the
+regex crate.
+
+Performance improvements:
+
+* [PERF #930](https://github.com/rust-lang/regex/pull/930):
+  Optimize `replacen`. This also applies to `replace`, but not `replace_all`.
+
+Bug fixes:
+
+* [BUG #945](https://github.com/rust-lang/regex/issues/945):
+  Maybe fix rustdoc rendering by just bumping a new release?
+
+
+1.7.0 (2022-11-05)
+==================
+This release principally includes an upgrade to Unicode 15.
+
+New features:
+
+* [FEATURE #832](https://github.com/rust-lang/regex/issues/916):
+  Upgrade to Unicode 15.
+
+
+1.6.0 (2022-07-05)
+==================
+This release principally includes an upgrade to Unicode 14.
+
+New features:
+
+* [FEATURE #832](https://github.com/rust-lang/regex/pull/832):
+  Clarify that `Captures::len` includes all groups, not just matching groups.
+* [FEATURE #857](https://github.com/rust-lang/regex/pull/857):
+  Add an `ExactSizeIterator` impl for `SubCaptureMatches`.
+* [FEATURE #861](https://github.com/rust-lang/regex/pull/861):
+  Improve `RegexSet` documentation examples.
+* [FEATURE #877](https://github.com/rust-lang/regex/issues/877):
+  Upgrade to Unicode 14.
+
+Bug fixes:
+
+* [BUG #792](https://github.com/rust-lang/regex/issues/792):
+  Fix error message rendering bug.
+
+
+1.5.6 (2022-05-20)
+==================
+This release includes a few bug fixes, including a bug that produced incorrect
+matches when a non-greedy `?` operator was used.
+
+* [BUG #680](https://github.com/rust-lang/regex/issues/680):
+  Fixes a bug where `[[:alnum:][:^ascii:]]` dropped `[:alnum:]` from the class.
+* [BUG #859](https://github.com/rust-lang/regex/issues/859):
+  Fixes a bug where `Hir::is_match_empty` returned `false` for `\b`.
+* [BUG #862](https://github.com/rust-lang/regex/issues/862):
+  Fixes a bug where 'ab??' matches 'ab' instead of 'a' in 'ab'.
+
+
+1.5.5 (2022-03-08)
+==================
+This releases fixes a security bug in the regex compiler. This bug permits a
+vector for a denial-of-service attack in cases where the regex being compiled
+is untrusted. There are no known problems where the regex is itself trusted,
+including in cases of untrusted haystacks.
+
+* [SECURITY #GHSA-m5pq-gvj9-9vr8](https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8):
+  Fixes a bug in the regex compiler where empty sub-expressions subverted the
+  existing mitigations in place to enforce a size limit on compiled regexes.
+  The Rust Security Response WG published an advisory about this:
+  https://groups.google.com/g/rustlang-security-announcements/c/NcNNL1Jq7Yw
+
+
 1.5.4 (2021-05-06)
 ==================
 This release fixes another compilation failure when building regex. This time,
@@ -669,7 +764,7 @@ New features:
 * Empty sub-expressions are now permitted in most places. That is, `()+` is
   now a valid regex.
 * Almost everything in regex-syntax now uses constant stack space, even when
-  performing anaylsis that requires structural induction. This reduces the risk
+  performing analysis that requires structural induction. This reduces the risk
   of a user provided regular expression causing a stack overflow.
 * [FEATURE #174](https://github.com/rust-lang/regex/issues/174):
   The `Ast` type in `regex-syntax` now contains span information.
diff --git a/Cargo.toml b/Cargo.toml
index 260acec..37e44fb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,27 +3,33 @@
 # When uploading crates to the registry Cargo will automatically
 # "normalize" Cargo.toml files for maximal compatibility
 # with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g., crates.io) dependencies
+# to registry (e.g., crates.io) dependencies.
 #
-# If you believe there's an error in this file please file an
-# issue against the rust-lang/cargo repository. If you're
-# editing this file be aware that the upstream Cargo.toml
-# will likely look very different (and much more reasonable)
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
 
 [package]
 edition = "2018"
 name = "regex"
-version = "1.5.4"
+version = "1.7.3"
 authors = ["The Rust Project Developers"]
-exclude = ["/scripts/*", "/.github/*"]
+exclude = [
+    "/scripts/*",
+    "/.github/*",
+]
 autotests = false
-description = "An implementation of regular expressions for Rust. This implementation uses\nfinite automata and guarantees linear time matching on all inputs.\n"
+description = """
+An implementation of regular expressions for Rust. This implementation uses
+finite automata and guarantees linear time matching on all inputs.
+"""
 homepage = "https://github.com/rust-lang/regex"
 documentation = "https://docs.rs/regex"
 readme = "README.md"
 categories = ["text-processing"]
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/rust-lang/regex"
+
 [profile.bench]
 debug = true
 
@@ -72,6 +78,7 @@ path = "tests/test_backtrack_bytes.rs"
 [[test]]
 name = "crates-regex"
 path = "tests/test_crates_regex.rs"
+
 [dependencies.aho-corasick]
 version = "0.7.18"
 optional = true
@@ -81,8 +88,9 @@ version = "2.4.0"
 optional = true
 
 [dependencies.regex-syntax]
-version = "0.6.25"
+version = "0.6.29"
 default-features = false
+
 [dev-dependencies.lazy_static]
 version = "1"
 
@@ -92,19 +100,44 @@ default-features = false
 
 [dev-dependencies.rand]
 version = "0.8.3"
-features = ["getrandom", "small_rng"]
+features = [
+    "getrandom",
+    "small_rng",
+]
 default-features = false
 
 [features]
-default = ["std", "perf", "unicode", "regex-syntax/default"]
+default = [
+    "std",
+    "perf",
+    "unicode",
+    "regex-syntax/default",
+]
 pattern = []
-perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
+perf = [
+    "perf-cache",
+    "perf-dfa",
+    "perf-inline",
+    "perf-literal",
+]
 perf-cache = []
 perf-dfa = []
 perf-inline = []
-perf-literal = ["aho-corasick", "memchr"]
+perf-literal = [
+    "aho-corasick",
+    "memchr",
+]
 std = []
-unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment", "regex-syntax/unicode"]
+unicode = [
+    "unicode-age",
+    "unicode-bool",
+    "unicode-case",
+    "unicode-gencat",
+    "unicode-perl",
+    "unicode-script",
+    "unicode-segment",
+    "regex-syntax/unicode",
+]
 unicode-age = ["regex-syntax/unicode-age"]
 unicode-bool = ["regex-syntax/unicode-bool"]
 unicode-case = ["regex-syntax/unicode-case"]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 468230b..4c5bd1c 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
 [package]
 name = "regex"
-version = "1.5.4"  #:version
+version = "1.7.3"  #:version
 authors = ["The Rust Project Developers"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
@@ -117,7 +117,7 @@ optional = true
 # For parsing regular expressions.
 [dependencies.regex-syntax]
 path = "regex-syntax"
-version = "0.6.25"
+version = "0.6.29"
 default-features = false
 
 [dev-dependencies]
diff --git a/METADATA b/METADATA
index 6611aac..f8d1a17 100644
--- a/METADATA
+++ b/METADATA
@@ -1,3 +1,7 @@
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update rust/crates/regex
+# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+
 name: "regex"
 description: "An implementation of regular expressions for Rust. This implementation uses finite automata and guarantees linear time matching on all inputs."
 third_party {
@@ -7,13 +11,13 @@ third_party {
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/regex/regex-1.5.4.crate"
+    value: "https://static.crates.io/crates/regex/regex-1.7.3.crate"
   }
-  version: "1.5.4"
+  version: "1.7.3"
   license_type: NOTICE
   last_upgrade_date {
-    year: 2021
-    month: 5
-    day: 19
+    year: 2023
+    month: 4
+    day: 3
   }
 }
diff --git a/README.md b/README.md
index 86d6996..861417d 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Much of the syntax and implementation is inspired
 by [RE2](https://github.com/google/re2).
 
 [![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
-[![](https://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex)
+[![Crates.io](https://img.shields.io/crates/v/regex.svg)](https://crates.io/crates/regex)
 [![Rust](https://img.shields.io/badge/rust-1.41.1%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
 
 ### Documentation
@@ -23,12 +23,8 @@ can be found on the
 
 ### Usage
 
-Add this to your `Cargo.toml`:
-
-```toml
-[dependencies]
-regex = "1.5"
-```
+To bring this crate into your repository, either add `regex` to your
+`Cargo.toml`, or run `cargo add regex`.
 
 Here's a simple example that matches a date in YYYY-MM-DD format and prints the
 year, month and day:
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 6064d70..c99af76 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -5,6 +5,9 @@
       "path": "external/rust/crates/base64"
     },
     {
+      "path": "external/rust/crates/clap/2.33.3"
+    },
+    {
       "path": "external/rust/crates/libsqlite3-sys"
     },
     {
@@ -18,16 +21,22 @@
     },
     {
       "path": "external/rust/crates/unicode-xid"
-    }
-  ],
-  "presubmit": [
+    },
+    {
+      "path": "packages/modules/Virtualization/virtualizationmanager"
+    },
     {
-      "name": "keystore2_test"
+      "path": "system/keymint/hal"
     },
     {
-      "name": "legacykeystore_test"
+      "path": "system/security/keystore2"
     },
     {
+      "path": "system/security/keystore2/legacykeystore"
+    }
+  ],
+  "presubmit": [
+    {
       "name": "regex_test_src_lib"
     },
     {
@@ -56,19 +65,10 @@
     },
     {
       "name": "regex_test_tests_test_nfa_utf8bytes"
-    },
-    {
-      "name": "virtualizationservice_device_test"
     }
   ],
   "presubmit-rust": [
     {
-      "name": "keystore2_test"
-    },
-    {
-      "name": "legacykeystore_test"
-    },
-    {
       "name": "regex_test_src_lib"
     },
     {
@@ -97,9 +97,6 @@
     },
     {
       "name": "regex_test_tests_test_nfa_utf8bytes"
-    },
-    {
-      "name": "virtualizationservice_device_test"
     }
   ]
 }
diff --git a/cargo2android.json b/cargo2android.json
index 0e54308..bef74ca 100644
--- a/cargo2android.json
+++ b/cargo2android.json
@@ -7,5 +7,6 @@
   "dependencies": true,
   "device": true,
   "run": true,
-  "tests": true
+  "tests": true,
+  "vendor-available": true
 }
diff --git a/src/backtrack.rs b/src/backtrack.rs
index a3d25d6..4d83856 100644
--- a/src/backtrack.rs
+++ b/src/backtrack.rs
@@ -93,13 +93,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
         let mut cache = cache.borrow_mut();
         let cache = &mut cache.backtrack;
         let start = input.at(start);
-        let mut b = Bounded {
-            prog: prog,
-            input: input,
-            matches: matches,
-            slots: slots,
-            m: cache,
-        };
+        let mut b = Bounded { prog, input, matches, slots, m: cache };
         b.exec_(start, end)
     }
 
@@ -220,14 +214,14 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
                         // job is popped and the old capture index is restored.
                         self.m.jobs.push(Job::SaveRestore {
                             slot: inst.slot,
-                            old_pos: old_pos,
+                            old_pos,
                         });
                         self.slots[inst.slot] = Some(at.pos());
                     }
                     ip = inst.goto;
                 }
                 Split(ref inst) => {
-                    self.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
+                    self.m.jobs.push(Job::Inst { ip: inst.goto2, at });
                     ip = inst.goto1;
                 }
                 EmptyLook(ref inst) => {
diff --git a/src/compile.rs b/src/compile.rs
index 9a2ed5e..90ca250 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -38,6 +38,16 @@ pub struct Compiler {
     suffix_cache: SuffixCache,
     utf8_seqs: Option<Utf8Sequences>,
     byte_classes: ByteClassSet,
+    // This keeps track of extra bytes allocated while compiling the regex
+    // program. Currently, this corresponds to two things. First is the heap
+    // memory allocated by Unicode character classes ('InstRanges'). Second is
+    // a "fake" amount of memory used by empty sub-expressions, so that enough
+    // empty sub-expressions will ultimately trigger the compiler to bail
+    // because of a size limit restriction. (That empty sub-expressions don't
+    // add to heap memory usage is more-or-less an implementation detail.) In
+    // the second case, if we don't bail, then an excessively large repetition
+    // on an empty sub-expression can result in the compiler using a very large
+    // amount of CPU time.
     extra_inst_bytes: usize,
 }
 
@@ -139,7 +149,8 @@ impl Compiler {
             self.compiled.start = dotstar_patch.entry;
         }
         self.compiled.captures = vec![None];
-        let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst());
+        let patch =
+            self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst());
         if self.compiled.needs_dotstar() {
             self.fill(dotstar_patch.hole, patch.entry);
         } else {
@@ -175,7 +186,7 @@ impl Compiler {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
             let Patch { hole, entry } =
-                self.c_capture(0, expr)?.unwrap_or(self.next_inst());
+                self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst());
             self.fill_to_next(hole);
             self.compiled.matches.push(self.insts.len());
             self.push_compiled(Inst::Match(i));
@@ -183,7 +194,7 @@ impl Compiler {
         }
         let i = exprs.len() - 1;
         let Patch { hole, entry } =
-            self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst());
+            self.c_capture(0, &exprs[i])?.unwrap_or_else(|| self.next_inst());
         self.fill(prev_hole, entry);
         self.fill_to_next(hole);
         self.compiled.matches.push(self.insts.len());
@@ -260,7 +271,7 @@ impl Compiler {
 
         self.check_size()?;
         match *expr.kind() {
-            Empty => Ok(None),
+            Empty => self.c_empty(),
             Literal(hir::Literal::Unicode(c)) => self.c_char(c),
             Literal(hir::Literal::Byte(b)) => {
                 assert!(self.compiled.uses_bytes());
@@ -378,6 +389,19 @@ impl Compiler {
         }
     }
 
+    fn c_empty(&mut self) -> ResultOrEmpty {
+        // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
+        // See: CVE-2022-24713
+        //
+        // Since 'empty' sub-expressions don't increase the size of
+        // the actual compiled object, we "fake" an increase in its
+        // size so that our 'check_size_limit' routine will eventually
+        // stop compilation if there are too many empty sub-expressions
+        // (e.g., via a large repetition).
+        self.extra_inst_bytes += std::mem::size_of::<Inst>();
+        Ok(None)
+    }
+
     fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty {
         if self.num_exprs > 1 || self.compiled.is_dfa {
             // Don't ever compile Save instructions for regex sets because
@@ -387,11 +411,11 @@ impl Compiler {
         } else {
             let entry = self.insts.len();
             let hole = self.push_hole(InstHole::Save { slot: first_slot });
-            let patch = self.c(expr)?.unwrap_or(self.next_inst());
+            let patch = self.c(expr)?.unwrap_or_else(|| self.next_inst());
             self.fill(hole, patch.entry);
             self.fill_to_next(patch.hole);
             let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
-            Ok(Some(Patch { hole: hole, entry: entry }))
+            Ok(Some(Patch { hole, entry }))
         }
     }
 
@@ -425,7 +449,7 @@ impl Compiler {
                 self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
             }
         } else {
-            let hole = self.push_hole(InstHole::Char { c: c });
+            let hole = self.push_hole(InstHole::Char { c });
             Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
         }
     }
@@ -435,7 +459,7 @@ impl Compiler {
 
         assert!(!ranges.is_empty());
         if self.compiled.uses_bytes() {
-            Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
+            Ok(Some(CompileClass { c: self, ranges }.compile()?))
         } else {
             let ranges: Vec<(char, char)> =
                 ranges.iter().map(|r| (r.start(), r.end())).collect();
@@ -444,9 +468,9 @@ impl Compiler {
             } else {
                 self.extra_inst_bytes +=
                     ranges.len() * (size_of::<char>() * 2);
-                self.push_hole(InstHole::Ranges { ranges: ranges })
+                self.push_hole(InstHole::Ranges { ranges })
             };
-            Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
+            Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
         }
     }
 
@@ -485,8 +509,8 @@ impl Compiler {
     }
 
     fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty {
-        let hole = self.push_hole(InstHole::EmptyLook { look: look });
-        Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
+        let hole = self.push_hole(InstHole::EmptyLook { look });
+        Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
     }
 
     fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
@@ -496,7 +520,7 @@ impl Compiler {
         let mut exprs = exprs.into_iter();
         let Patch { mut hole, entry } = loop {
             match exprs.next() {
-                None => return Ok(None),
+                None => return self.c_empty(),
                 Some(e) => {
                     if let Some(p) = self.c(e)? {
                         break p;
@@ -510,7 +534,7 @@ impl Compiler {
                 hole = p.hole;
             }
         }
-        Ok(Some(Patch { hole: hole, entry: entry }))
+        Ok(Some(Patch { hole, entry }))
     }
 
     fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty {
@@ -653,7 +677,7 @@ impl Compiler {
         // None).
         let patch_concat = self
             .c_concat(iter::repeat(expr).take(min))?
-            .unwrap_or(self.next_inst());
+            .unwrap_or_else(|| self.next_inst());
         if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? {
             self.fill(patch_concat.hole, patch_rep.entry);
             Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry }))
@@ -677,7 +701,7 @@ impl Compiler {
         }
         // Same reasoning as in c_repeat_range_min_or_more (we know that min <
         // max at this point).
-        let patch_concat = patch_concat.unwrap_or(self.next_inst());
+        let patch_concat = patch_concat.unwrap_or_else(|| self.next_inst());
         let initial_entry = patch_concat.entry;
         // It is much simpler to compile, e.g., `a{2,5}` as:
         //
@@ -856,14 +880,14 @@ impl MaybeInst {
             }
             MaybeInst::Split1(goto1) => {
                 MaybeInst::Compiled(Inst::Split(InstSplit {
-                    goto1: goto1,
+                    goto1,
                     goto2: goto,
                 }))
             }
             MaybeInst::Split2(goto2) => {
                 MaybeInst::Compiled(Inst::Split(InstSplit {
                     goto1: goto,
-                    goto2: goto2,
+                    goto2,
                 }))
             }
             _ => unreachable!(
@@ -877,9 +901,7 @@ impl MaybeInst {
 
     fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {
         let filled = match *self {
-            MaybeInst::Split => {
-                Inst::Split(InstSplit { goto1: goto1, goto2: goto2 })
-            }
+            MaybeInst::Split => Inst::Split(InstSplit { goto1, goto2 }),
             _ => unreachable!(
                 "must be called on Split instruction, \
                  instead it was called on: {:?}",
@@ -937,19 +959,17 @@ enum InstHole {
 impl InstHole {
     fn fill(&self, goto: InstPtr) -> Inst {
         match *self {
-            InstHole::Save { slot } => {
-                Inst::Save(InstSave { goto: goto, slot: slot })
-            }
+            InstHole::Save { slot } => Inst::Save(InstSave { goto, slot }),
             InstHole::EmptyLook { look } => {
-                Inst::EmptyLook(InstEmptyLook { goto: goto, look: look })
+                Inst::EmptyLook(InstEmptyLook { goto, look })
             }
-            InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }),
+            InstHole::Char { c } => Inst::Char(InstChar { goto, c }),
             InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges {
-                goto: goto,
+                goto,
                 ranges: ranges.clone().into_boxed_slice(),
             }),
             InstHole::Bytes { start, end } => {
-                Inst::Bytes(InstBytes { goto: goto, start: start, end: end })
+                Inst::Bytes(InstBytes { goto, start, end })
             }
         }
     }
@@ -1019,7 +1039,7 @@ impl<'a, 'b> CompileClass<'a, 'b> {
         let mut last_hole = Hole::None;
         for byte_range in seq {
             let key = SuffixCacheKey {
-                from_inst: from_inst,
+                from_inst,
                 start: byte_range.start,
                 end: byte_range.end,
             };
@@ -1109,7 +1129,7 @@ impl SuffixCache {
             }
         }
         *pos = self.dense.len();
-        self.dense.push(SuffixCacheEntry { key: key, pc: pc });
+        self.dense.push(SuffixCacheEntry { key, pc });
         None
     }
 
@@ -1120,8 +1140,8 @@ impl SuffixCache {
     fn hash(&self, suffix: &SuffixCacheKey) -> usize {
         // Basic FNV-1a hash as described:
         // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-        const FNV_PRIME: u64 = 1099511628211;
-        let mut h = 14695981039346656037;
+        const FNV_PRIME: u64 = 1_099_511_628_211;
+        let mut h = 14_695_981_039_346_656_037;
         h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME);
         h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME);
         h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME);
diff --git a/src/dfa.rs b/src/dfa.rs
index 4b60f4d..dc99521 100644
--- a/src/dfa.rs
+++ b/src/dfa.rs
@@ -31,7 +31,7 @@ considerably more complex than one might expect out of a DFA. A number of
 tricks are employed to make it fast. Tread carefully.
 
 N.B. While this implementation is heavily commented, Russ Cox's series of
-articles on regexes is strongly recommended: https://swtch.com/~rsc/regexp/
+articles on regexes is strongly recommended: <https://swtch.com/~rsc/regexp/>
 (As is the DFA implementation in RE2, which heavily influenced this
 implementation.)
 */
@@ -454,10 +454,10 @@ impl<'a> Fsm<'a> {
         let mut cache = cache.borrow_mut();
         let cache = &mut cache.dfa;
         let mut dfa = Fsm {
-            prog: prog,
+            prog,
             start: 0, // filled in below
-            at: at,
-            quit_after_match: quit_after_match,
+            at,
+            quit_after_match,
             last_match_si: STATE_UNKNOWN,
             last_cache_flush: at,
             cache: &mut cache.inner,
@@ -484,10 +484,10 @@ impl<'a> Fsm<'a> {
         let mut cache = cache.borrow_mut();
         let cache = &mut cache.dfa_reverse;
         let mut dfa = Fsm {
-            prog: prog,
+            prog,
             start: 0, // filled in below
-            at: at,
-            quit_after_match: quit_after_match,
+            at,
+            quit_after_match,
             last_match_si: STATE_UNKNOWN,
             last_cache_flush: at,
             cache: &mut cache.inner,
@@ -515,9 +515,9 @@ impl<'a> Fsm<'a> {
         let mut cache = cache.borrow_mut();
         let cache = &mut cache.dfa;
         let mut dfa = Fsm {
-            prog: prog,
+            prog,
             start: 0, // filled in below
-            at: at,
+            at,
             quit_after_match: false,
             last_match_si: STATE_UNKNOWN,
             last_cache_flush: at,
@@ -1353,7 +1353,6 @@ impl<'a> Fsm<'a> {
         match self.cache.trans.next(si, self.byte_class(b)) {
             STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b),
             STATE_QUIT => None,
-            STATE_DEAD => Some(STATE_DEAD),
             nsi => Some(nsi),
         }
     }
@@ -1387,7 +1386,6 @@ impl<'a> Fsm<'a> {
         };
         match self.cache.start_states[flagi] {
             STATE_UNKNOWN => {}
-            STATE_DEAD => return Some(STATE_DEAD),
             si => return Some(si),
         }
         q.clear();
@@ -1608,11 +1606,7 @@ struct StateMap {
 
 impl StateMap {
     fn new(num_byte_classes: usize) -> StateMap {
-        StateMap {
-            map: HashMap::new(),
-            states: vec![],
-            num_byte_classes: num_byte_classes,
-        }
+        StateMap { map: HashMap::new(), states: vec![], num_byte_classes }
     }
 
     fn len(&self) -> usize {
@@ -1648,7 +1642,7 @@ impl Transitions {
     /// The number of byte classes corresponds to the stride. Every state will
     /// have `num_byte_classes` slots for transitions.
     fn new(num_byte_classes: usize) -> Transitions {
-        Transitions { table: vec![], num_byte_classes: num_byte_classes }
+        Transitions { table: vec![], num_byte_classes }
     }
 
     /// Returns the total number of states currently in this table.
@@ -1698,27 +1692,27 @@ impl Transitions {
 
 impl StateFlags {
     fn is_match(&self) -> bool {
-        self.0 & 0b0000000_1 > 0
+        self.0 & 0b0000_0001 > 0
     }
 
     fn set_match(&mut self) {
-        self.0 |= 0b0000000_1;
+        self.0 |= 0b0000_0001;
     }
 
     fn is_word(&self) -> bool {
-        self.0 & 0b000000_1_0 > 0
+        self.0 & 0b0000_0010 > 0
     }
 
     fn set_word(&mut self) {
-        self.0 |= 0b000000_1_0;
+        self.0 |= 0b0000_0010;
     }
 
     fn has_empty(&self) -> bool {
-        self.0 & 0b00000_1_00 > 0
+        self.0 & 0b0000_0100 > 0
     }
 
     fn set_empty(&mut self) {
-        self.0 |= 0b00000_1_00;
+        self.0 |= 0b0000_0100;
     }
 }
 
diff --git a/src/exec.rs b/src/exec.rs
index d5fad1c..b9abcdc 100644
--- a/src/exec.rs
+++ b/src/exec.rs
@@ -288,10 +288,10 @@ impl ExecBuilder {
             exprs.push(expr);
         }
         Ok(Parsed {
-            exprs: exprs,
+            exprs,
             prefixes: prefixes.unwrap_or_else(Literals::empty),
             suffixes: suffixes.unwrap_or_else(Literals::empty),
-            bytes: bytes,
+            bytes,
         })
     }
 
@@ -311,7 +311,7 @@ impl ExecBuilder {
                 match_type: MatchType::Nothing,
             });
             let pool = ExecReadOnly::new_pool(&ro);
-            return Ok(Exec { ro: ro, pool });
+            return Ok(Exec { ro, pool });
         }
         let parsed = self.parse()?;
         let mut nfa = Compiler::new()
@@ -340,12 +340,12 @@ impl ExecBuilder {
 
         let mut ro = ExecReadOnly {
             res: self.options.pats,
-            nfa: nfa,
-            dfa: dfa,
-            dfa_reverse: dfa_reverse,
+            nfa,
+            dfa,
+            dfa_reverse,
             suffixes: LiteralSearcher::suffixes(parsed.suffixes),
             #[cfg(feature = "perf-literal")]
-            ac: ac,
+            ac,
             match_type: MatchType::Nothing,
         };
         ro.match_type = ro.choose_match_type(self.match_type);
@@ -459,7 +459,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
                     self.cache.value(),
                     true,
                     &text[start..],
-                    text.len(),
+                    text.len() - start,
                 ) {
                     dfa::Result::Match(_) => Some(text.len()),
                     dfa::Result::NoMatch(_) => None,
@@ -511,7 +511,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
                     self.cache.value(),
                     true,
                     &text[start..],
-                    text.len(),
+                    text.len() - start,
                 ) {
                     dfa::Result::Match(_) => true,
                     dfa::Result::NoMatch(_) => false,
diff --git a/src/expand.rs b/src/expand.rs
index fd9c2d0..67b5149 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -127,7 +127,7 @@ impl From<usize> for Ref<'static> {
 /// If no such valid reference could be found, None is returned.
 fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
     let mut i = 0;
-    let rep: &[u8] = replacement.as_ref();
+    let rep: &[u8] = replacement;
     if rep.len() <= 1 || rep[0] != b'$' {
         return None;
     }
@@ -136,7 +136,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
         return find_cap_ref_braced(rep, i + 1);
     }
     let mut cap_end = i;
-    while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
+    while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) {
         cap_end += 1;
     }
     if cap_end == i {
@@ -183,8 +183,8 @@ fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> {
 }
 
 /// Returns true if and only if the given byte is allowed in a capture name.
-fn is_valid_cap_letter(b: &u8) -> bool {
-    match *b {
+fn is_valid_cap_letter(b: u8) -> bool {
+    match b {
         b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
         _ => false,
     }
diff --git a/src/input.rs b/src/input.rs
index 5d50ee3..df6c3e0 100644
--- a/src/input.rs
+++ b/src/input.rs
@@ -160,7 +160,7 @@ impl<'t> Input for CharInput<'t> {
             InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 }
         } else {
             let c = decode_utf8(&self[i..]).map(|(c, _)| c).into();
-            InputAt { pos: i, c: c, byte: None, len: c.len_utf8() }
+            InputAt { pos: i, c, byte: None, len: c.len_utf8() }
         }
     }
 
@@ -231,7 +231,7 @@ pub struct ByteInput<'t> {
 impl<'t> ByteInput<'t> {
     /// Return a new byte-based input reader for the given string.
     pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> {
-        ByteInput { text: text, only_utf8: only_utf8 }
+        ByteInput { text, only_utf8 }
     }
 }
 
diff --git a/src/lib.rs b/src/lib.rs
index 7f2dec8..6b95739 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -353,6 +353,9 @@ $     the end of text (or end-of-line with multi-line mode)
 \B    not a Unicode word boundary
 </pre>
 
+The empty regex is valid and matches the empty string. For example, the empty
+regex matches `abc` at positions `0`, `1`, `2` and `3`.
+
 ## Grouping and flags
 
 <pre class="rust">
@@ -628,7 +631,6 @@ pub use crate::re_builder::unicode::*;
 #[cfg(feature = "std")]
 pub use crate::re_set::unicode::*;
 #[cfg(feature = "std")]
-#[cfg(feature = "std")]
 pub use crate::re_unicode::{
     escape, CaptureLocations, CaptureMatches, CaptureNames, Captures,
     Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split,
diff --git a/src/literal/imp.rs b/src/literal/imp.rs
index 82f050a..90b2f11 100644
--- a/src/literal/imp.rs
+++ b/src/literal/imp.rs
@@ -57,10 +57,10 @@ impl LiteralSearcher {
     fn new(lits: Literals, matcher: Matcher) -> Self {
         let complete = lits.all_complete();
         LiteralSearcher {
-            complete: complete,
+            complete,
             lcp: Memmem::new(lits.longest_common_prefix()),
             lcs: Memmem::new(lits.longest_common_suffix()),
-            matcher: matcher,
+            matcher,
         }
     }
 
diff --git a/src/pattern.rs b/src/pattern.rs
index b4ffd8e..00549e5 100644
--- a/src/pattern.rs
+++ b/src/pattern.rs
@@ -15,7 +15,7 @@ impl<'r, 't> Pattern<'t> for &'r Regex {
 
     fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> {
         RegexSearcher {
-            haystack: haystack,
+            haystack,
             it: self.find_iter(haystack),
             last_step_end: 0,
             next_match: None,
diff --git a/src/pikevm.rs b/src/pikevm.rs
index 9a14240..8c9eac2 100644
--- a/src/pikevm.rs
+++ b/src/pikevm.rs
@@ -100,7 +100,7 @@ impl<'r, I: Input> Fsm<'r, I> {
         cache.clist.resize(prog.len(), prog.captures.len());
         cache.nlist.resize(prog.len(), prog.captures.len());
         let at = input.at(start);
-        Fsm { prog: prog, stack: &mut cache.stack, input: input }.exec_(
+        Fsm { prog, stack: &mut cache.stack, input }.exec_(
             &mut cache.clist,
             &mut cache.nlist,
             matches,
diff --git a/src/prog.rs b/src/prog.rs
index 475a811..c211f71 100644
--- a/src/prog.rs
+++ b/src/prog.rs
@@ -233,7 +233,7 @@ impl fmt::Debug for Program {
             if pc == self.start {
                 write!(f, " (start)")?;
             }
-            write!(f, "\n")?;
+            writeln!(f)?;
         }
         Ok(())
     }
diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index ae55d6d..07e9f98 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -53,7 +53,7 @@ impl<'t> Match<'t> {
     /// Creates a new match from the given haystack and byte offsets.
     #[inline]
     fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> {
-        Match { text: haystack, start: start, end: end }
+        Match { text: haystack, start, end }
     }
 }
 
@@ -255,7 +255,7 @@ impl Regex {
     pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> {
         let mut locs = self.capture_locations();
         self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
-            text: text,
+            text,
             locs: locs.0,
             named_groups: self.0.capture_name_idx().clone(),
         })
@@ -496,12 +496,12 @@ impl Regex {
             let mut new = Vec::with_capacity(text.len());
             let mut last_match = 0;
             for (i, m) in it {
-                if limit > 0 && i >= limit {
-                    break;
-                }
                 new.extend_from_slice(&text[last_match..m.start()]);
                 new.extend_from_slice(&rep);
                 last_match = m.end();
+                if limit > 0 && i >= limit - 1 {
+                    break;
+                }
             }
             new.extend_from_slice(&text[last_match..]);
             return Cow::Owned(new);
@@ -516,14 +516,14 @@ impl Regex {
         let mut new = Vec::with_capacity(text.len());
         let mut last_match = 0;
         for (i, cap) in it {
-            if limit > 0 && i >= limit {
-                break;
-            }
             // unwrap on 0 is OK because captures only reports matches
             let m = cap.get(0).unwrap();
             new.extend_from_slice(&text[last_match..m.start()]);
             rep.replace_append(&cap, &mut new);
             last_match = m.end();
+            if limit > 0 && i >= limit - 1 {
+                break;
+            }
         }
         new.extend_from_slice(&text[last_match..]);
         Cow::Owned(new)
@@ -578,7 +578,7 @@ impl Regex {
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
     pub fn is_match_at(&self, text: &[u8], start: usize) -> bool {
-        self.shortest_match_at(text, start).is_some()
+        self.0.searcher().is_match_at(text, start)
     }
 
     /// Returns the same as find, but starts the search at the given
@@ -723,7 +723,7 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
     fn next(&mut self) -> Option<Captures<'t>> {
         self.0.next().map(|locs| Captures {
             text: self.0.text(),
-            locs: locs,
+            locs,
             named_groups: self.0.regex().capture_name_idx().clone(),
         })
     }
@@ -877,7 +877,7 @@ impl CaptureLocations {
         self.0.pos(i)
     }
 
-    /// Returns the total number of capturing groups.
+    /// Returns the total number of capture groups (even if they didn't match).
     ///
     /// This is always at least `1` since every regex has at least `1`
     /// capturing group that corresponds to the entire match.
@@ -979,7 +979,7 @@ impl<'t> Captures<'t> {
         expand_bytes(self, replacement, dst)
     }
 
-    /// Returns the number of captured groups.
+    /// Returns the total number of capture groups (even if they didn't match).
     ///
     /// This is always at least `1`, since every regex has at least one capture
     /// group that corresponds to the full match.
diff --git a/src/re_set.rs b/src/re_set.rs
index 73d5953..a6d886d 100644
--- a/src/re_set.rs
+++ b/src/re_set.rs
@@ -59,13 +59,45 @@ $(#[$doc_regexset_example])*
 /// 1. Does any regex in the set match?
 /// 2. If so, which regexes in the set match?
 ///
-/// As with the main `Regex` type, it is cheaper to ask (1) instead of (2)
-/// since the matching engines can stop after the first match is found.
+/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1)
+/// instead of (2) since the matching engines can stop after the first match
+/// is found.
 ///
-/// Other features like finding the location of successive matches or their
-/// sub-captures aren't supported. If you need this functionality, the
-/// recommended approach is to compile each regex in the set independently and
-/// selectively match them based on which regexes in the set matched.
+/// You cannot directly extract [`Match`][crate::Match] or
+/// [`Captures`][crate::Captures] objects from a regex set. If you need these
+/// operations, the recommended approach is to compile each pattern in the set
+/// independently and scan the exact same input a second time with those
+/// independently compiled patterns:
+///
+/// ```rust
+/// use regex::{Regex, RegexSet};
+///
+/// let patterns = ["foo", "bar"];
+/// // Both patterns will match different ranges of this string.
+/// let text = "barfoo";
+///
+/// // Compile a set matching any of our patterns.
+/// let set = RegexSet::new(&patterns).unwrap();
+/// // Compile each pattern independently.
+/// let regexes: Vec<_> = set.patterns().iter()
+///     .map(|pat| Regex::new(pat).unwrap())
+///     .collect();
+///
+/// // Match against the whole set first and identify the individual
+/// // matching patterns.
+/// let matches: Vec<&str> = set.matches(text).into_iter()
+///     // Dereference the match index to get the corresponding
+///     // compiled pattern.
+///     .map(|match_idx| &regexes[match_idx])
+///     // To get match locations or any other info, we then have to search
+///     // the exact same text again, using our separately-compiled pattern.
+///     .map(|pat| pat.find(text).unwrap().as_str())
+///     .collect();
+///
+/// // Matches arrive in the order the constituent patterns were declared,
+/// // not the order they appear in the input.
+/// assert_eq!(vec!["foo", "bar"], matches);
+/// ```
 ///
 /// # Performance
 ///
diff --git a/src/re_trait.rs b/src/re_trait.rs
index 680aa54..d0c717d 100644
--- a/src/re_trait.rs
+++ b/src/re_trait.rs
@@ -74,8 +74,19 @@ impl<'c> Iterator for SubCapturesPosIter<'c> {
         self.idx += 1;
         x
     }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.locs.len() - self.idx;
+        (len, Some(len))
+    }
+
+    fn count(self) -> usize {
+        self.len()
+    }
 }
 
+impl<'c> ExactSizeIterator for SubCapturesPosIter<'c> {}
+
 impl<'c> FusedIterator for SubCapturesPosIter<'c> {}
 
 /// `RegularExpression` describes types that can implement regex searching.
@@ -139,7 +150,7 @@ pub trait RegularExpression: Sized + fmt::Debug {
     /// Returns an iterator over all non-overlapping successive leftmost-first
     /// matches.
     fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> {
-        Matches { re: self, text: text, last_end: 0, last_match: None }
+        Matches { re: self, text, last_end: 0, last_match: None }
     }
 
     /// Returns an iterator over all non-overlapping successive leftmost-first
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index 142c78f..197510e 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -61,7 +61,7 @@ impl<'t> Match<'t> {
     /// Creates a new match from the given haystack and byte offsets.
     #[inline]
     fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> {
-        Match { text: haystack, start: start, end: end }
+        Match { text: haystack, start, end }
     }
 }
 
@@ -129,7 +129,7 @@ impl<'t> From<Match<'t>> for Range<usize> {
 /// assert!(haystack.contains(&re));
 /// assert_eq!(haystack.find(&re), Some(1));
 /// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(),
-///            vec![(1, 4), (5, 8)]);
+///            vec![(1, "111"), (5, "222")]);
 /// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]);
 /// ```
 #[derive(Clone)]
@@ -311,7 +311,7 @@ impl Regex {
     pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
         let mut locs = self.capture_locations();
         self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
-            text: text,
+            text,
             locs: locs.0,
             named_groups: self.0.capture_name_idx().clone(),
         })
@@ -538,7 +538,7 @@ impl Regex {
         mut rep: R,
     ) -> Cow<'t, str> {
         // If we know that the replacement doesn't have any capture expansions,
-        // then we can fast path. The fast path can make a tremendous
+        // then we can use the fast path. The fast path can make a tremendous
         // difference:
         //
         //   1) We use `find_iter` instead of `captures_iter`. Not asking for
@@ -554,12 +554,12 @@ impl Regex {
             let mut new = String::with_capacity(text.len());
             let mut last_match = 0;
             for (i, m) in it {
-                if limit > 0 && i >= limit {
-                    break;
-                }
                 new.push_str(&text[last_match..m.start()]);
                 new.push_str(&rep);
                 last_match = m.end();
+                if limit > 0 && i >= limit - 1 {
+                    break;
+                }
             }
             new.push_str(&text[last_match..]);
             return Cow::Owned(new);
@@ -574,14 +574,14 @@ impl Regex {
         let mut new = String::with_capacity(text.len());
         let mut last_match = 0;
         for (i, cap) in it {
-            if limit > 0 && i >= limit {
-                break;
-            }
             // unwrap on 0 is OK because captures only reports matches
             let m = cap.get(0).unwrap();
             new.push_str(&text[last_match..m.start()]);
             rep.replace_append(&cap, &mut new);
             last_match = m.end();
+            if limit > 0 && i >= limit - 1 {
+                break;
+            }
         }
         new.push_str(&text[last_match..]);
         Cow::Owned(new)
@@ -636,7 +636,7 @@ impl Regex {
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
     pub fn is_match_at(&self, text: &str, start: usize) -> bool {
-        self.shortest_match_at(text, start).is_some()
+        self.0.searcher_str().is_match_at(text, start)
     }
 
     /// Returns the same as find, but starts the search at the given
@@ -887,7 +887,7 @@ impl CaptureLocations {
         self.0.pos(i)
     }
 
-    /// Returns the total number of capturing groups.
+    /// Returns the total number of capture groups (even if they didn't match).
     ///
     /// This is always at least `1` since every regex has at least `1`
     /// capturing group that corresponds to the entire match.
@@ -989,7 +989,7 @@ impl<'t> Captures<'t> {
         expand_str(self, replacement, dst)
     }
 
-    /// Returns the number of captured groups.
+    /// Returns the total number of capture groups (even if they didn't match).
     ///
     /// This is always at least `1`, since every regex has at least one capture
     /// group that corresponds to the full match.
@@ -1092,8 +1092,18 @@ impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
             .next()
             .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e)))
     }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+
+    fn count(self) -> usize {
+        self.it.count()
+    }
 }
 
+impl<'c, 't> ExactSizeIterator for SubCaptureMatches<'c, 't> {}
+
 impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
 
 /// An iterator that yields all non-overlapping capture groups matching a
@@ -1114,7 +1124,7 @@ impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
     fn next(&mut self) -> Option<Captures<'t>> {
         self.0.next().map(|locs| Captures {
             text: self.0.text(),
-            locs: locs,
+            locs,
             named_groups: self.0.regex().capture_name_idx().clone(),
         })
     }
diff --git a/src/utf8.rs b/src/utf8.rs
index 6e0608f..2dfd2c0 100644
--- a/src/utf8.rs
+++ b/src/utf8.rs
@@ -108,7 +108,7 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
                 | ((b2 & !TAG_CONT) as u32) << 6
                 | ((b3 & !TAG_CONT) as u32);
             match cp {
-                0x10000..=0x10FFFF => char::from_u32(cp).map(|cp| (cp, 4)),
+                0x10000..=0x0010_FFFF => char::from_u32(cp).map(|cp| (cp, 4)),
                 _ => None,
             }
         }
diff --git a/tests/regression.rs b/tests/regression.rs
index 44b9083..e8b2525 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -217,3 +217,6 @@ matiter!(
 // https://en.wikipedia.org/wiki/Je_(Cyrillic)
 ismatch!(empty_group_match, r"()Ј01", "zЈ01", true);
 matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5));
+
+// See: https://github.com/rust-lang/regex/issues/862
+mat!(non_greedy_question_literal, r"ab??", "ab", Some((0, 1)));
diff --git a/tests/replace.rs b/tests/replace.rs
index 1dc6106..d65be07 100644
--- a/tests/replace.rs
+++ b/tests/replace.rs
@@ -228,3 +228,21 @@ replace!(
     bytes!(&std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])),
     "age: Z6"
 );
+
+#[test]
+fn replacen_no_captures() {
+    let re = regex!(r"[0-9]");
+    assert_eq!(
+        re.replacen(text!("age: 1234"), 2, t!("Z")),
+        text!("age: ZZ34")
+    );
+}
+
+#[test]
+fn replacen_with_captures() {
+    let re = regex!(r"([0-9])");
+    assert_eq!(
+        re.replacen(text!("age: 1234"), 2, t!("${1}Z")),
+        text!("age: 1Z2Z34")
+    );
+}
diff --git a/tests/test_default.rs b/tests/test_default.rs
index d4365fb..19a319a 100644
--- a/tests/test_default.rs
+++ b/tests/test_default.rs
@@ -150,3 +150,83 @@ fn regex_is_reasonably_small() {
     assert_eq!(16, size_of::<bytes::Regex>());
     assert_eq!(16, size_of::<bytes::RegexSet>());
 }
+
+// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
+// See: CVE-2022-24713
+//
+// We test that our regex compiler will correctly return a "too big" error when
+// we try to use a very large repetition on an *empty* sub-expression.
+//
+// At the time this test was written, the regex compiler does not represent
+// empty sub-expressions with any bytecode instructions. In effect, it's an
+// "optimization" to leave them out, since they would otherwise correspond
+// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
+// epsilon transition in the NFA graph). Therefore, an empty sub-expression
+// represents an interesting case for the compiler's size limits. Since it
+// doesn't actually contribute any additional memory to the compiled regex
+// instructions, the size limit machinery never detects it. Instead, it just
+// dumbly tries to compile the empty sub-expression N times, where N is the
+// repetition size.
+//
+// When N is very large, this will cause the compiler to essentially spin and
+// do nothing for a decently large amount of time. It causes the regex to take
+// quite a bit of time to compile, despite the concrete syntax of the regex
+// being quite small.
+//
+// The degree to which this is actually a problem is somewhat of a judgment
+// call. Some regexes simply take a long time to compile. But in general, you
+// should be able to reasonably control this by setting lower or higher size
+// limits on the compiled object size. But this mitigation doesn't work at all
+// for this case.
+//
+// This particular test is somewhat narrow. It merely checks that regex
+// compilation will, at some point, return a "too big" error. Before the
+// fix landed, this test would eventually fail because the regex would be
+// successfully compiled (after enough time elapsed). So while this test
+// doesn't check that we exit in a reasonable amount of time, it does at least
+// check that we are properly returning an error at some point.
+#[test]
+fn big_empty_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new("(?:){4294967295}");
+    assert!(result.is_err());
+}
+
+// Below is a "billion laughs" variant of the previous test case.
+#[test]
+fn big_empty_reps_chain_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
+    assert!(result.is_err());
+}
+
+// Below is another situation where a zero-length sub-expression can be
+// introduced.
+#[test]
+fn big_zero_reps_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new(r"x{0}{4294967295}");
+    assert!(result.is_err());
+}
+
+// Testing another case for completeness.
+#[test]
+fn empty_alt_regex_fails() {
+    use regex::Regex;
+
+    let result = Regex::new(r"(?:|){4294967295}");
+    assert!(result.is_err());
+}
+
+// Regression test for: https://github.com/rust-lang/regex/issues/969
+#[test]
+fn regression_i969() {
+    use regex::Regex;
+
+    let re = Regex::new(r"c.*d\z").unwrap();
+    assert_eq!(Some(6), re.shortest_match_at("ababcd", 4));
+    assert_eq!(Some(6), re.find_at("ababcd", 4).map(|m| m.end()));
+}
diff --git a/tests/unicode.rs b/tests/unicode.rs
index 9f1cd0c..9b32286 100644
--- a/tests/unicode.rs
+++ b/tests/unicode.rs
@@ -232,3 +232,20 @@ mat!(uni_class_sb2, r"\p{sb=lower}", "\u{0469}", Some((0, 2)));
 mat!(uni_class_sb3, r"\p{sb=Close}", "\u{FF60}", Some((0, 3)));
 mat!(uni_class_sb4, r"\p{sb=Close}", "\u{1F677}", Some((0, 4)));
 mat!(uni_class_sb5, r"\p{sb=SContinue}", "\u{FF64}", Some((0, 3)));
+
+// Test 'Vithkuqi' support, which was added in Unicode 14.
+// See: https://github.com/rust-lang/regex/issues/877
+mat!(
+    uni_vithkuqi_literal_upper,
+    r"(?i)^\u{10570}$",
+    "\u{10570}",
+    Some((0, 4))
+);
+mat!(
+    uni_vithkuqi_literal_lower,
+    r"(?i)^\u{10570}$",
+    "\u{10597}",
+    Some((0, 4))
+);
+mat!(uni_vithkuqi_word_upper, r"^\w$", "\u{10570}", Some((0, 4)));
+mat!(uni_vithkuqi_word_lower, r"^\w$", "\u{10597}", Some((0, 4)));
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2023-07-07 05:08:34 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2023-07-07 05:08:34 +0000
commit	040d3c7b6da7ad05ec0b007fec02bf26479c256e (patch)
tree	d050b095d3d08bc7800c98bf5fe9e2ec15a35921
parent	6e92301fee3c847c54ffdab6da1b21c4a7deb1c6 (diff)
parent	14bea0e36ba5a038a98be2269a0450fe90c9d229 (diff)
download	regex-android14-mainline-permission-release.tar.gz