1 files changed, 307 insertions, 7 deletions
diff --git a/src/tests/memchr/mod.rs b/src/tests/memchr/mod.rs
index 79f94ab..0564ad4 100644
--- a/src/tests/memchr/mod.rs
+++ b/src/tests/memchr/mod.rs
@@ -1,7 +1,307 @@
-#[cfg(all(feature = "std", not(miri)))]
-mod iter;
-#[cfg(all(feature = "std", not(miri)))]
-mod memchr;
-mod simple;
-#[cfg(all(feature = "std", not(miri)))]
-mod testdata;
+use alloc::{
+    string::{String, ToString},
+    vec,
+    vec::Vec,
+};
+
+use crate::ext::Byte;
+
+pub(crate) mod naive;
+#[macro_use]
+pub(crate) mod prop;
+
+const SEEDS: &'static [Seed] = &[
+    Seed { haystack: "a", needles: &[b'a'], positions: &[0] },
+    Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] },
+    Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] },
+    Seed { haystack: "", needles: &[b'a'], positions: &[] },
+    Seed { haystack: "z", needles: &[b'a'], positions: &[] },
+    Seed { haystack: "zz", needles: &[b'a'], positions: &[] },
+    Seed { haystack: "zza", needles: &[b'a'], positions: &[2] },
+    Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] },
+    Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] },
+    Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] },
+    Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] },
+    Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] },
+    Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] },
+    Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] },
+    Seed {
+        haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
+        needles: &[b'a'],
+        positions: &[32],
+    },
+    // two needles (applied to memchr2 + memchr3)
+    Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
+    Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
+    Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] },
+    Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] },
+    Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] },
+    Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] },
+    Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] },
+    // three needles (applied to memchr3)
+    Seed {
+        haystack: "xyz",
+        needles: &[b'x', b'y', b'z'],
+        positions: &[0, 1, 2],
+    },
+    Seed {
+        haystack: "zxy",
+        needles: &[b'x', b'y', b'z'],
+        positions: &[0, 1, 2],
+    },
+    Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] },
+    Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] },
+    Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] },
+];
+
+/// Runs a host of substring search tests.
+///
+/// This has support for "partial" substring search implementations only work
+/// for a subset of needles/haystacks. For example, the "packed pair" substring
+/// search implementation only works for haystacks of some minimum length based
+/// of the pair of bytes selected and the size of the vector used.
+pub(crate) struct Runner {
+    needle_len: usize,
+}
+
+impl Runner {
+    /// Create a new test runner for forward and reverse byte search
+    /// implementations.
+    ///
+    /// The `needle_len` given must be at most `3` and at least `1`. It
+    /// corresponds to the number of needle bytes to search for.
+    pub(crate) fn new(needle_len: usize) -> Runner {
+        assert!(needle_len >= 1, "needle_len must be at least 1");
+        assert!(needle_len <= 3, "needle_len must be at most 3");
+        Runner { needle_len }
+    }
+
+    /// Run all tests. This panics on the first failure.
+    ///
+    /// If the implementation being tested returns `None` for a particular
+    /// haystack/needle combination, then that test is skipped.
+    pub(crate) fn forward_iter<F>(self, mut test: F)
+    where
+        F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
+    {
+        for seed in SEEDS.iter() {
+            if seed.needles.len() > self.needle_len {
+                continue;
+            }
+            for t in seed.generate() {
+                let results = match test(t.haystack.as_bytes(), &t.needles) {
+                    None => continue,
+                    Some(results) => results,
+                };
+                assert_eq!(
+                    t.expected,
+                    results,
+                    "needles: {:?}, haystack: {:?}",
+                    t.needles
+                        .iter()
+                        .map(|&b| b.to_char())
+                        .collect::<Vec<char>>(),
+                    t.haystack,
+                );
+            }
+        }
+    }
+
+    /// Run all tests in the reverse direction. This panics on the first
+    /// failure.
+    ///
+    /// If the implementation being tested returns `None` for a particular
+    /// haystack/needle combination, then that test is skipped.
+    pub(crate) fn reverse_iter<F>(self, mut test: F)
+    where
+        F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
+    {
+        for seed in SEEDS.iter() {
+            if seed.needles.len() > self.needle_len {
+                continue;
+            }
+            for t in seed.generate() {
+                let mut results = match test(t.haystack.as_bytes(), &t.needles)
+                {
+                    None => continue,
+                    Some(results) => results,
+                };
+                results.reverse();
+                assert_eq!(
+                    t.expected,
+                    results,
+                    "needles: {:?}, haystack: {:?}",
+                    t.needles
+                        .iter()
+                        .map(|&b| b.to_char())
+                        .collect::<Vec<char>>(),
+                    t.haystack,
+                );
+            }
+        }
+    }
+
+    /// Run all tests as counting tests. This panics on the first failure.
+    ///
+    /// That is, this only checks that the number of matches is correct and
+    /// not whether the offsets of each match are.
+    pub(crate) fn count_iter<F>(self, mut test: F)
+    where
+        F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static,
+    {
+        for seed in SEEDS.iter() {
+            if seed.needles.len() > self.needle_len {
+                continue;
+            }
+            for t in seed.generate() {
+                let got = match test(t.haystack.as_bytes(), &t.needles) {
+                    None => continue,
+                    Some(got) => got,
+                };
+                assert_eq!(
+                    t.expected.len(),
+                    got,
+                    "needles: {:?}, haystack: {:?}",
+                    t.needles
+                        .iter()
+                        .map(|&b| b.to_char())
+                        .collect::<Vec<char>>(),
+                    t.haystack,
+                );
+            }
+        }
+    }
+
+    /// Like `Runner::forward`, but for a function that returns only the next
+    /// match and not all matches.
+    ///
+    /// If the function returns `None`, then it is skipped.
+    pub(crate) fn forward_oneshot<F>(self, mut test: F)
+    where
+        F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
+    {
+        self.forward_iter(move |haystack, needles| {
+            let mut start = 0;
+            let mut results = vec![];
+            while let Some(i) = test(&haystack[start..], needles)? {
+                results.push(start + i);
+                start += i + 1;
+            }
+            Some(results)
+        })
+    }
+
+    /// Like `Runner::reverse`, but for a function that returns only the last
+    /// match and not all matches.
+    ///
+    /// If the function returns `None`, then it is skipped.
+    pub(crate) fn reverse_oneshot<F>(self, mut test: F)
+    where
+        F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
+    {
+        self.reverse_iter(move |haystack, needles| {
+            let mut end = haystack.len();
+            let mut results = vec![];
+            while let Some(i) = test(&haystack[..end], needles)? {
+                results.push(i);
+                end = i;
+            }
+            Some(results)
+        })
+    }
+}
+
+/// A single test for memr?chr{,2,3}.
+#[derive(Clone, Debug)]
+struct Test {
+    /// The string to search in.
+    haystack: String,
+    /// The needles to look for.
+    needles: Vec<u8>,
+    /// The offsets that are expected to be found for all needles in the
+    /// forward direction.
+    expected: Vec<usize>,
+}
+
+impl Test {
+    fn new(seed: &Seed) -> Test {
+        Test {
+            haystack: seed.haystack.to_string(),
+            needles: seed.needles.to_vec(),
+            expected: seed.positions.to_vec(),
+        }
+    }
+}
+
+/// Data that can be expanded into many memchr tests by padding out the corpus.
+#[derive(Clone, Debug)]
+struct Seed {
+    /// The thing to search. We use `&str` instead of `&[u8]` because they
+    /// are nicer to write in tests, and we don't miss much since memchr
+    /// doesn't care about UTF-8.
+    ///
+    /// Corpora cannot contain either '%' or '#'. We use these bytes when
+    /// expanding test cases into many test cases, and we assume they are not
+    /// used. If they are used, `memchr_tests` will panic.
+    haystack: &'static str,
+    /// The needles to search for. This is intended to be an alternation of
+    /// needles. The number of needles may cause this test to be skipped for
+    /// some memchr variants. For example, a test with 2 needles cannot be used
+    /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
+    /// However, a test with only 1 needle can be used to test all of `memchr`,
+    /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
+    /// bytes that we never used in the corpus (such as '#').
+    needles: &'static [u8],
+    /// The positions expected to match for all of the needles.
+    positions: &'static [usize],
+}
+
+impl Seed {
+    /// Controls how much we expand the haystack on either side for each test.
+    /// We lower this on Miri because otherwise running the tests would take
+    /// forever.
+    const EXPAND_LEN: usize = {
+        #[cfg(not(miri))]
+        {
+            515
+        }
+        #[cfg(miri)]
+        {
+            6
+        }
+    };
+
+    /// Expand this test into many variations of the same test.
+    ///
+    /// In particular, this will generate more tests with larger corpus sizes.
+    /// The expected positions are updated to maintain the integrity of the
+    /// test.
+    ///
+    /// This is important in testing a memchr implementation, because there are
+    /// often different cases depending on the length of the corpus.
+    ///
+    /// Note that we extend the corpus by adding `%` bytes, which we
+    /// don't otherwise use as a needle.
+    fn generate(&self) -> impl Iterator<Item = Test> {
+        let mut more = vec![];
+
+        // Add bytes to the start of the corpus.
+        for i in 0..Seed::EXPAND_LEN {
+            let mut t = Test::new(self);
+            let mut new: String = core::iter::repeat('%').take(i).collect();
+            new.push_str(&t.haystack);
+            t.haystack = new;
+            t.expected = t.expected.into_iter().map(|p| p + i).collect();
+            more.push(t);
+        }
+        // Add bytes to the end of the corpus.
+        for i in 1..Seed::EXPAND_LEN {
+            let mut t = Test::new(self);
+            let padding: String = core::iter::repeat('%').take(i).collect();
+            t.haystack.push_str(&padding);
+            more.push(t);
+        }
+
+        more.into_iter()
+    }
+}