summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Mayle <fmayle@google.com>2024-01-17 16:24:54 -0800
committerFrederick Mayle <fmayle@google.com>2024-01-17 16:29:06 -0800
commit8965c6d7386e6a625e8a7f1d3dfc830b396a5d1b (patch)
treedeb15b92a910530d5454decbf7c1b4a516d51a37
parentd8493866da62375e1cb03b1344541397086bcd24 (diff)
downloadlz4_flex-8965c6d7386e6a625e8a7f1d3dfc830b396a5d1b.tar.gz
Import 'lz4_flex' crateupstream
Request Document: go/android-rust-importing-crates For CL Reviewers: go/android3p#cl-review For Build Team: go/ab-third-party-imports Bug: 319233671 Test: n/a Change-Id: I9d65f7035d039557e5947767b401f576628e3d06
-rw-r--r--.cargo_vcs_info.json6
-rw-r--r--Cargo.lock389
-rw-r--r--Cargo.toml100
-rw-r--r--Cargo.toml.orig87
-rw-r--r--LICENSE20
-rw-r--r--METADATA19
-rw-r--r--MODULE_LICENSE_MIT0
-rw-r--r--OWNERS3
-rw-r--r--README.md129
-rw-r--r--src/block/compress.rs969
-rw-r--r--src/block/decompress.rs544
-rw-r--r--src/block/decompress_safe.rs399
-rw-r--r--src/block/hashtable.rs161
-rw-r--r--src/block/mod.rs157
-rw-r--r--src/fastcpy.rs145
-rw-r--r--src/fastcpy_unsafe.rs165
-rw-r--r--src/frame/compress.rs471
-rw-r--r--src/frame/decompress.rs449
-rw-r--r--src/frame/header.rs412
-rw-r--r--src/frame/mod.rs111
-rw-r--r--src/lib.rs109
-rw-r--r--src/sink.rs330
22 files changed, 5175 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..7ae5865
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,6 @@
+{
+ "git": {
+ "sha1": "4e87de7f57ce8be339a73b385f3789a129b73fb0"
+ },
+ "path_in_vcs": ""
+} \ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..0007166
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,389 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bit-set"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "byteorder"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855"
+
+[[package]]
+name = "byteorder"
+version = "1.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+
+[[package]]
+name = "cc"
+version = "1.0.73"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
+dependencies = [
+ "jobserver",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "fastrand"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
+dependencies = [
+ "instant",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "getrandom"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
+
+[[package]]
+name = "jobserver"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.125"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b"
+
+[[package]]
+name = "libm"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
+
+[[package]]
+name = "lz4-compress"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f966533a922a9bba9e95e594c1fdb3b9bf5fdcdb11e37e51ad84cd76e468b91"
+dependencies = [
+ "byteorder 0.5.3",
+ "quick-error 1.2.3",
+]
+
+[[package]]
+name = "lz4_flex"
+version = "0.11.2"
+dependencies = [
+ "lz4-compress",
+ "lzzzz",
+ "more-asserts",
+ "proptest",
+ "serde_json",
+ "snap",
+ "twox-hash",
+]
+
+[[package]]
+name = "lzzzz"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8014d1362004776e6a91e4c15a3aa7830d1b6650a075b51a9969ebb6d6af13bc"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "more-asserts"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e"
+
+[[package]]
+name = "num-traits"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "proptest"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29f1b898011ce9595050a68e60f90bad083ff2987a695a42357134c8381fba70"
+dependencies = [
+ "bit-set",
+ "bitflags",
+ "byteorder 1.4.3",
+ "lazy_static",
+ "num-traits",
+ "quick-error 2.0.1",
+ "rand",
+ "rand_chacha",
+ "rand_xorshift",
+ "regex-syntax",
+ "rusty-fork",
+ "tempfile",
+ "unarray",
+]
+
+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
+[[package]]
+name = "quick-error"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rand_xorshift"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+
+[[package]]
+name = "remove_dir_all"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "rusty-fork"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
+dependencies = [
+ "fnv",
+ "quick-error 1.2.3",
+ "tempfile",
+ "wait-timeout",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
+
+[[package]]
+name = "serde"
+version = "1.0.137"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1"
+
+[[package]]
+name = "serde_json"
+version = "1.0.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "snap"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831"
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "tempfile"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "libc",
+ "redox_syscall",
+ "remove_dir_all",
+ "winapi",
+]
+
+[[package]]
+name = "twox-hash"
+version = "1.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
+dependencies = [
+ "cfg-if",
+ "static_assertions",
+]
+
+[[package]]
+name = "unarray"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+
+[[package]]
+name = "wait-timeout"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..40fbadd
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,100 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "lz4_flex"
+version = "0.11.2"
+authors = [
+ "Pascal Seitz <pascal.seitz@gmail.com>",
+ "Arthur Silva <arthurprs@gmail.com>",
+ "ticki <Ticki@users.noreply.github.com>",
+]
+include = [
+ "src/*.rs",
+ "src/frame/**/*",
+ "src/block/**/*",
+ "README.md",
+ "LICENSE",
+]
+description = "Fastest LZ4 implementation in Rust, no unsafe by default."
+homepage = "https://github.com/pseitz/lz4_flex"
+readme = "README.md"
+keywords = [
+ "compression",
+ "lz4",
+ "compress",
+ "decompression",
+ "decompress",
+]
+license = "MIT"
+repository = "https://github.com/pseitz/lz4_flex"
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = [
+ "--cfg",
+ "docsrs",
+]
+
+[profile.bench]
+opt-level = 3
+lto = true
+codegen-units = 1
+
+[profile.release]
+opt-level = 3
+codegen-units = 1
+panic = "unwind"
+
+[[bench]]
+name = "crit_bench"
+path = "benches/crit_bench.rs"
+harness = false
+
+[dependencies.twox-hash]
+version = "1.6.3"
+optional = true
+default-features = false
+
+[dev-dependencies.lz4-compress]
+version = "0.1.1"
+
+[dev-dependencies.lzzzz]
+version = "1.0.4"
+
+[dev-dependencies.more-asserts]
+version = "0.3.1"
+
+[dev-dependencies.proptest]
+version = "1.0.0"
+
+[dev-dependencies.serde_json]
+version = "1.0.91"
+
+[dev-dependencies.snap]
+version = "1.1.0"
+
+[features]
+default = [
+ "std",
+ "safe-encode",
+ "safe-decode",
+ "frame",
+]
+frame = [
+ "std",
+ "dep:twox-hash",
+]
+nightly = []
+safe-decode = []
+safe-encode = []
+std = []
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..3eea255
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,87 @@
+[package]
+authors = ["Pascal Seitz <pascal.seitz@gmail.com>", "Arthur Silva <arthurprs@gmail.com>", "ticki <Ticki@users.noreply.github.com>"]
+description = "Fastest LZ4 implementation in Rust, no unsafe by default."
+edition = "2021"
+keywords = ["compression", "lz4", "compress", "decompression", "decompress"]
+name = "lz4_flex"
+homepage = "https://github.com/pseitz/lz4_flex"
+repository = "https://github.com/pseitz/lz4_flex"
+readme = "README.md"
+license = "MIT"
+version = "0.11.2"
+include = ["src/*.rs", "src/frame/**/*", "src/block/**/*", "README.md", "LICENSE"]
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
+
+[[bench]]
+harness = false
+name = "crit_bench"
+path = "benches/crit_bench.rs"
+
+[dev-dependencies]
+criterion = { git = "https://github.com/PSeitz/criterion.rs/", rev = "cf60ffc"}
+lzzzz = "1.0.4"
+lz4-compress = "0.1.1"
+more-asserts = "0.3.1"
+snap = "1.1.0"
+serde_json = "1.0.91"
+proptest = "1.0.0"
+
+[dev-dependencies.lz-fear]
+git = "https://github.com/main--/rust-lz-fear"
+
+ #Uncomment to make lz4_flex master available as lz4_flex_master
+ #[dev-dependencies.lz4_flex_master]
+ #rev= "a122673" # v10
+ #git = "https://github.com/PSeitz/lz4_flex"
+ #package = "lz4_flex"
+ #default-features=false
+ #features = ["std", "safe-encode", "safe-decode", "frame"]
+
+[features]
+default = ["std", "safe-encode", "safe-decode", "frame"]
+safe-decode = []
+safe-encode = []
+#unchecked-decode = [] # Removes some checks for additional performance. Only enable on trusted input!
+frame = ["std", "dep:twox-hash"]
+std = []
+# use nightly compiler features
+nightly = []
+
+[dependencies]
+twox-hash = { version = "1.6.3", default-features = false, optional = true }
+
+[profile.bench]
+codegen-units = 1
+lto = true
+opt-level = 3
+
+[profile.release]
+codegen-units = 1
+#debug = true
+opt-level = 3
+panic = "unwind"
+
+# [[bench]]
+# harness = false
+# name = "quickbench"
+# path = "benches/quickbench.rs"
+
+# [[bench]]
+# harness = false
+# name = "bench"
+# path = "benches/bench.rs"
+
+# [[bin]]
+# name = "decompress_with_stats"
+# path = "src/test_bins/decompress_with_stats.rs"
+
+# [[bin]]
+# name = "profile_decomp"
+# path = "src/test_bins/profile_decomp.rs"
+
+# [[bin]]
+# name = "profile_comp"
+# path = "src/test_bins/profile_comp.rs"
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0037a72
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2020 Pascal Seitz
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..858b3d6
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,19 @@
+name: "lz4_flex"
+description: "Fastest LZ4 implementation in Rust, no unsafe by default."
+third_party {
+ identifier {
+ type: "crates.io"
+ value: "https://crates.io/crates/lz4_flex"
+ }
+ identifier {
+ type: "Archive"
+ value: "https://static.crates.io/crates/lz4_flex/lz4_flex-0.11.2.crate"
+ }
+ version: "0.11.2"
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2024
+ month: 1
+ day: 17
+ }
+}
diff --git a/MODULE_LICENSE_MIT b/MODULE_LICENSE_MIT
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_MIT
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..41179fc
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,3 @@
+# Bug component: 688011
+include platform/prebuilts/rust:main:/OWNERS
+fmayle@google.com
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7b303a6
--- /dev/null
+++ b/README.md
@@ -0,0 +1,129 @@
+![Rust](https://github.com/PSeitz/lz4_flex/workflows/Rust/badge.svg)
+[![Docs](https://docs.rs/lz4_flex/badge.svg)](https://docs.rs/crate/lz4_flex/)
+[![Crates.io](https://img.shields.io/crates/v/lz4_flex.svg)](https://crates.io/crates/lz4_flex)
+
+# lz4_flex
+
+![lz4_flex_logo](https://raw.githubusercontent.com/PSeitz/lz4_flex/master/logo.jpg)
+
+Fastest LZ4 implementation in Rust. Originally based on [redox-os' lz4 compression](https://crates.io/crates/lz4-compress), but now a complete rewrite.
+The results in the table are from a benchmark in this project (66Kb JSON, 10MB dickens) with the block format.
+
+AMD Ryzen 7 5900HX, rustc 1.69.0 (84c898d65 2023-04-16), Manjaro, CPU Boost Disabled, CPU Governor: Performance
+
+66Kb JSON
+| Compressor | Compression | Decompression | Ratio |
+|----------------------|-------------|---------------|---------------|
+| lz4_flex unsafe w. unchecked_decode | 1615 MiB/s | 5973 MiB/s | 0.2284 |
+| lz4_flex unsafe | 1615 MiB/s | 5512 MiB/s | 0.2284 |
+| lz4_flex safe | 1272 MiB/s | 4540 MiB/s | 0.2284 |
+| lzzz (lz4 1.9.3) | 1469 MiB/s | 5313 MiB/s | 0.2283 |
+| lz4_fear | 662 MiB/s | 939 MiB/s | 0.2283 |
+| snap | 1452 MiB/s | 1649 MiB/s | 0.2242 |
+
+10 Mb dickens
+| Compressor | Compression | Decompression | Ratio |
+|----------------------|-------------|---------------|---------------|
+| lz4_flex unsafe w. unchecked_decode | 347 MiB/s | 3168 MiB/s | 0.6372 |
+| lz4_flex unsafe | 347 MiB/s | 2734 MiB/s | 0.6372 |
+| lz4_flex safe | 259 MiB/s | 2338 MiB/s | 0.6372 |
+| lzzz (lz4 1.9.3) | 324 MiB/s | 2759 MiB/s | 0.6372 |
+| lz4_fear | 201 MiB/s | 370 MiB/s | 0.6372 |
+| snap | 286 MiB/s | 679 MiB/s | 0.6276 |
+
+## Features
+- Very good logo
+- LZ4 Block format
+- LZ4 Frame format (thanks @arthurprs)
+- High performance
+- 1,5s clean release build time
+- Feature flags to configure safe/unsafe code usage
+- no-std support with block format (thanks @coolreader18)
+- 32-bit support
+
+## Usage:
+Compression and decompression uses no usafe via the default feature flags "safe-encode" and "safe-decode". If you need more performance you can disable them (e.g. with no-default-features).
+
+Safe:
+```
+lz4_flex = { version = "0.11" }
+```
+
+Performance:
+```
+lz4_flex = { version = "0.11", default-features = false }
+```
+
+### Block Format
+The block format is only valid for smaller data chunks as as block is de/compressed in memory.
+For larger data use the frame format, which consists of multiple blocks.
+
+```rust
+use lz4_flex::block::{compress_prepend_size, decompress_size_prepended};
+
+fn main(){
+ let input: &[u8] = b"Hello people, what's up?";
+ let compressed = compress_prepend_size(input);
+ let uncompressed = decompress_size_prepended(&compressed).unwrap();
+ assert_eq!(input, uncompressed);
+}
+```
+
+
+## no_std support
+
+no_std support is currently only for the block format, since the frame format uses `std::io::Write`, which is not available in core.
+
+## Benchmarks
+The benchmark is run with criterion, the test files are in the benches folder.
+
+Currently 4 implementations are compared, this one, [lz-fear](https://github.com/main--/rust-lz-fear), the [c version via rust bindings](https://crates.io/crates/lzzzz) and [snappy](https://github.com/burntsushi/rust-snappy).
+The lz4-flex version is tested with the feature flags safe-decode and safe-encode switched on and off.
+
+- lz4_cpp: https://crates.io/crates/lzzzz
+- lz-fear: https://github.com/main--/rust-lz-fear
+- snap: https://github.com/burntsushi/rust-snappy
+
+Tested on AMD Ryzen 7 5900HX, rustc 1.69.0 (84c898d65 2023-04-16), Manjaro, CPU Boost Disabled, CPU 3GHZ
+
+### Results v0.11.0 02-06-2023 (safe-decode and safe-encode off)
+`cargo bench --no-default-features`
+
+![Compress](./compress_bench.svg)
+
+![Decompress](./decompress_bench.svg)
+
+### Results v0.11.0 02-06-2023 (safe-decode and safe-encode on)
+`cargo bench`
+
+![Compress](./compress_bench_safe.svg)
+
+![Decompress](./decompress_bench_safe.svg)
+
+## Miri
+
+[Miri](https://github.com/rust-lang/miri) can be used to find issues related to incorrect unsafe usage:
+
+`MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-disable-stacked-borrows" cargo +nightly miri test --no-default-features --features frame`
+
+## Fuzzer
+This fuzz target generates corrupted data for the decompressor.
+`cargo +nightly fuzz run fuzz_decomp_corrupt_block` and `cargo +nightly fuzz run fuzz_decomp_corrupt_frame`
+
+This fuzz target asserts that a compression and decompression rountrip returns the original input.
+`cargo +nightly fuzz run fuzz_roundtrip` and `cargo +nightly fuzz run fuzz_roundtrip_frame`
+
+This fuzz target asserts compression with cpp and decompression with lz4_flex returns the original input.
+`cargo +nightly fuzz run fuzz_roundtrip_cpp_compress`
+
+## Bindings in other languages
+ - Node.js: [lz4-napi](https://github.com/antoniomuso/lz4-napi)
+ - Wasm: [lz4-wasm](https://github.com/PSeitz/lz4-wasm)
+
+## TODO
+- High compression
+
+## Migrate from v0.10 to v0.11.1
+To migrate, just remove the `checked-decode` feature flag if you used it.
+
+
diff --git a/src/block/compress.rs b/src/block/compress.rs
new file mode 100644
index 0000000..c18474a
--- /dev/null
+++ b/src/block/compress.rs
@@ -0,0 +1,969 @@
+//! The compression algorithm.
+//!
+//! We make use of hash tables to find duplicates. This gives a reasonable compression ratio with a
+//! high performance. It has fixed memory usage, which contrary to other approachs, makes it less
+//! memory hungry.
+
+use crate::block::hashtable::HashTable;
+use crate::block::END_OFFSET;
+use crate::block::LZ4_MIN_LENGTH;
+use crate::block::MAX_DISTANCE;
+use crate::block::MFLIMIT;
+use crate::block::MINMATCH;
+#[cfg(not(feature = "safe-encode"))]
+use crate::sink::PtrSink;
+use crate::sink::Sink;
+use crate::sink::SliceSink;
+#[allow(unused_imports)]
+use alloc::vec;
+use alloc::vec::Vec;
+
+#[cfg(feature = "safe-encode")]
+use core::convert::TryInto;
+
+use super::hashtable::HashTable4K;
+use super::hashtable::HashTable4KU16;
+use super::{CompressError, WINDOW_SIZE};
+
+/// Increase step size after 1<<INCREASE_STEPSIZE_BITSHIFT non matches
+const INCREASE_STEPSIZE_BITSHIFT: usize = 5;
+
+/// Read a 4-byte "batch" from some position.
+///
+/// This will read a native-endian 4-byte integer from some position.
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+pub(super) fn get_batch(input: &[u8], n: usize) -> u32 {
+ unsafe { read_u32_ptr(input.as_ptr().add(n)) }
+}
+
+#[inline]
+#[cfg(feature = "safe-encode")]
+pub(super) fn get_batch(input: &[u8], n: usize) -> u32 {
+ u32::from_ne_bytes(input[n..n + 4].try_into().unwrap())
+}
+
+/// Read an usize sized "batch" from some position.
+///
+/// This will read a native-endian usize from some position.
+#[inline]
+#[allow(dead_code)]
+#[cfg(not(feature = "safe-encode"))]
+pub(super) fn get_batch_arch(input: &[u8], n: usize) -> usize {
+ unsafe { read_usize_ptr(input.as_ptr().add(n)) }
+}
+
+#[inline]
+#[allow(dead_code)]
+#[cfg(feature = "safe-encode")]
+pub(super) fn get_batch_arch(input: &[u8], n: usize) -> usize {
+ const USIZE_SIZE: usize = core::mem::size_of::<usize>();
+ let arr: &[u8; USIZE_SIZE] = input[n..n + USIZE_SIZE].try_into().unwrap();
+ usize::from_ne_bytes(*arr)
+}
+
+#[inline]
+fn token_from_literal(lit_len: usize) -> u8 {
+ if lit_len < 0xF {
+ // Since we can fit the literals length into it, there is no need for saturation.
+ (lit_len as u8) << 4
+ } else {
+ // We were unable to fit the literals into it, so we saturate to 0xF. We will later
+ // write the extensional value.
+ 0xF0
+ }
+}
+
+#[inline]
+fn token_from_literal_and_match_length(lit_len: usize, duplicate_length: usize) -> u8 {
+ let mut token = if lit_len < 0xF {
+ // Since we can fit the literals length into it, there is no need for saturation.
+ (lit_len as u8) << 4
+ } else {
+ // We were unable to fit the literals into it, so we saturate to 0xF. We will later
+ // write the extensional value.
+ 0xF0
+ };
+
+ token |= if duplicate_length < 0xF {
+ // We could fit it in.
+ duplicate_length as u8
+ } else {
+ // We were unable to fit it in, so we default to 0xF, which will later be extended.
+ 0xF
+ };
+
+ token
+}
+
+/// Counts the number of same bytes in two byte streams.
+/// `input` is the complete input
+/// `cur` is the current position in the input. it will be incremented by the number of matched
+/// bytes `source` either the same as input or an external slice
+/// `candidate` is the candidate position in `source`
+///
+/// The function ignores the last END_OFFSET bytes in input as those should be literals.
+#[inline]
+#[cfg(feature = "safe-encode")]
+fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize {
+ const USIZE_SIZE: usize = core::mem::size_of::<usize>();
+ let cur_slice = &input[*cur..input.len() - END_OFFSET];
+ let cand_slice = &source[candidate..];
+
+ let mut num = 0;
+ for (block1, block2) in cur_slice
+ .chunks_exact(USIZE_SIZE)
+ .zip(cand_slice.chunks_exact(USIZE_SIZE))
+ {
+ let input_block = usize::from_ne_bytes(block1.try_into().unwrap());
+ let match_block = usize::from_ne_bytes(block2.try_into().unwrap());
+
+ if input_block == match_block {
+ num += USIZE_SIZE;
+ } else {
+ let diff = input_block ^ match_block;
+ num += (diff.to_le().trailing_zeros() / 8) as usize;
+ *cur += num;
+ return num;
+ }
+ }
+
+ // If we're here we may have 1 to 7 bytes left to check close to the end of input
+ // or source slices. Since this is rare occurrence we mark it cold to get better
+ // ~5% better performance.
+ #[cold]
+ fn count_same_bytes_tail(a: &[u8], b: &[u8], offset: usize) -> usize {
+ a.iter()
+ .zip(b)
+ .skip(offset)
+ .take_while(|(a, b)| a == b)
+ .count()
+ }
+ num += count_same_bytes_tail(cur_slice, cand_slice, num);
+
+ *cur += num;
+ num
+}
+
+/// Counts the number of same bytes in two byte streams.
+/// `input` is the complete input
+/// `cur` is the current position in the input. it will be incremented by the number of matched
+/// bytes `source` either the same as input OR an external slice
+/// `candidate` is the candidate position in `source`
+///
+/// The function ignores the last END_OFFSET bytes in input as those should be literals.
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize {
+ let max_input_match = input.len().saturating_sub(*cur + END_OFFSET);
+ let max_candidate_match = source.len() - candidate;
+ // Considering both limits calc how far we may match in input.
+ let input_end = *cur + max_input_match.min(max_candidate_match);
+
+ let start = *cur;
+ let mut source_ptr = unsafe { source.as_ptr().add(candidate) };
+
+ // compare 4/8 bytes blocks depending on the arch
+ const STEP_SIZE: usize = core::mem::size_of::<usize>();
+ while *cur + STEP_SIZE <= input_end {
+ let diff = read_usize_ptr(unsafe { input.as_ptr().add(*cur) }) ^ read_usize_ptr(source_ptr);
+
+ if diff == 0 {
+ *cur += STEP_SIZE;
+ unsafe {
+ source_ptr = source_ptr.add(STEP_SIZE);
+ }
+ } else {
+ *cur += (diff.to_le().trailing_zeros() / 8) as usize;
+ return *cur - start;
+ }
+ }
+
+ // compare 4 bytes block
+ #[cfg(target_pointer_width = "64")]
+ {
+ if input_end - *cur >= 4 {
+ let diff = read_u32_ptr(unsafe { input.as_ptr().add(*cur) }) ^ read_u32_ptr(source_ptr);
+
+ if diff == 0 {
+ *cur += 4;
+ unsafe {
+ source_ptr = source_ptr.add(4);
+ }
+ } else {
+ *cur += (diff.to_le().trailing_zeros() / 8) as usize;
+ return *cur - start;
+ }
+ }
+ }
+
+ // compare 2 bytes block
+ if input_end - *cur >= 2
+ && unsafe { read_u16_ptr(input.as_ptr().add(*cur)) == read_u16_ptr(source_ptr) }
+ {
+ *cur += 2;
+ unsafe {
+ source_ptr = source_ptr.add(2);
+ }
+ }
+
+ if *cur < input_end
+ && unsafe { input.as_ptr().add(*cur).read() } == unsafe { source_ptr.read() }
+ {
+ *cur += 1;
+ }
+
+ *cur - start
+}
+
+/// Write an integer to the output.
+///
+/// Each additional byte then represent a value from 0 to 255, which is added to the previous value
+/// to produce a total length. When the byte value is 255, another byte must read and added, and so
+/// on. There can be any number of bytes of value "255" following token
+#[inline]
+#[cfg(feature = "safe-encode")]
+fn write_integer(output: &mut impl Sink, mut n: usize) {
+ // Note: Since `n` is usually < 0xFF and writing multiple bytes to the output
+ // requires 2 branches of bound check (due to the possibility of add overflows)
+ // the simple byte at a time implementation below is faster in most cases.
+ while n >= 0xFF {
+ n -= 0xFF;
+ push_byte(output, 0xFF);
+ }
+ push_byte(output, n as u8);
+}
+
+/// Write an integer to the output.
+///
+/// Each additional byte then represent a value from 0 to 255, which is added to the previous value
+/// to produce a total length. When the byte value is 255, another byte must read and added, and so
+/// on. There can be any number of bytes of value "255" following token
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn write_integer(output: &mut impl Sink, mut n: usize) {
+ // Write the 0xFF bytes as long as the integer is higher than said value.
+ if n >= 4 * 0xFF {
+ // In this unlikelly branch we use a fill instead of a loop,
+ // otherwise rustc may output a large unrolled/vectorized loop.
+ let bulk = n / (4 * 0xFF);
+ n %= 4 * 0xFF;
+ unsafe {
+ core::ptr::write_bytes(output.pos_mut_ptr(), 0xFF, 4 * bulk);
+ output.set_pos(output.pos() + 4 * bulk);
+ }
+ }
+
+ // Handle last 1 to 4 bytes
+ push_u32(output, 0xFFFFFFFF);
+ // Updating output len for the remainder
+ unsafe {
+ output.set_pos(output.pos() - 4 + 1 + n / 255);
+ // Write the remaining byte.
+ *output.pos_mut_ptr().sub(1) = (n % 255) as u8;
+ }
+}
+
+/// Handle the last bytes from the input as literals
+#[cold]
+fn handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize) {
+ let lit_len = input.len() - start;
+
+ let token = token_from_literal(lit_len);
+ push_byte(output, token);
+ if lit_len >= 0xF {
+ write_integer(output, lit_len - 0xF);
+ }
+ // Now, write the actual literals.
+ output.extend_from_slice(&input[start..]);
+}
+
+/// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate
+#[inline]
+#[cfg(feature = "safe-encode")]
+fn backtrack_match(
+ input: &[u8],
+ cur: &mut usize,
+ literal_start: usize,
+ source: &[u8],
+ candidate: &mut usize,
+) {
+ // Note: Even if iterator version of this loop has less branches inside the loop it has more
+ // branches before the loop. That in practice seems to make it slower than the while version
+ // bellow. TODO: It should be possible remove all bounds checks, since we are walking
+ // backwards
+ while *candidate > 0 && *cur > literal_start && input[*cur - 1] == source[*candidate - 1] {
+ *cur -= 1;
+ *candidate -= 1;
+ }
+}
+
+/// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn backtrack_match(
+ input: &[u8],
+ cur: &mut usize,
+ literal_start: usize,
+ source: &[u8],
+ candidate: &mut usize,
+) {
+ while unsafe {
+ *candidate > 0
+ && *cur > literal_start
+ && input.get_unchecked(*cur - 1) == source.get_unchecked(*candidate - 1)
+ } {
+ *cur -= 1;
+ *candidate -= 1;
+ }
+}
+
+/// Compress all bytes of `input[input_pos..]` into `output`.
+///
+/// Bytes in `input[..input_pos]` are treated as a preamble and can be used for lookback.
+/// This part is known as the compressor "prefix".
+/// Bytes in `ext_dict` logically precede the bytes in `input` and can also be used for lookback.
+///
+/// `input_stream_offset` is the logical position of the first byte of `input`. This allows same
+/// `dict` to be used for many calls to `compress_internal` as we can "readdress" the first byte of
+/// `input` to be something other than 0.
+///
+/// `dict` is the dictionary of previously encoded sequences.
+///
+/// This is used to find duplicates in the stream so they are not written multiple times.
+///
+/// Every four bytes are hashed, and in the resulting slot their position in the input buffer
+/// is placed in the dict. This way we can easily look up a candidate to back references.
+///
+/// Returns the number of bytes written (compressed) into `output`.
+///
+/// # Const parameters
+/// `USE_DICT`: Disables usage of ext_dict (it'll panic if a non-empty slice is used).
+/// In other words, this generates more optimized code when an external dictionary isn't used.
+///
+/// A similar const argument could be used to disable the Prefix mode (eg. USE_PREFIX),
+/// which would impose `input_pos == 0 && input_stream_offset == 0`. Experiments didn't
+/// show significant improvement though.
+// Intentionally avoid inlining.
+// Empirical tests revealed it to be rarely better but often significantly detrimental.
+#[inline(never)]
+pub(crate) fn compress_internal<T: HashTable, const USE_DICT: bool, S: Sink>(
+ input: &[u8],
+ input_pos: usize,
+ output: &mut S,
+ dict: &mut T,
+ ext_dict: &[u8],
+ input_stream_offset: usize,
+) -> Result<usize, CompressError> {
+ assert!(input_pos <= input.len());
+ if USE_DICT {
+ assert!(ext_dict.len() <= super::WINDOW_SIZE);
+ assert!(ext_dict.len() <= input_stream_offset);
+ // Check for overflow hazard when using ext_dict
+ assert!(input_stream_offset
+ .checked_add(input.len())
+ .and_then(|i| i.checked_add(ext_dict.len()))
+ .map_or(false, |i| i <= isize::MAX as usize));
+ } else {
+ assert!(ext_dict.is_empty());
+ }
+ if output.capacity() - output.pos() < get_maximum_output_size(input.len() - input_pos) {
+ return Err(CompressError::OutputTooSmall);
+ }
+
+ let output_start_pos = output.pos();
+ if input.len() - input_pos < LZ4_MIN_LENGTH {
+ handle_last_literals(output, input, input_pos);
+ return Ok(output.pos() - output_start_pos);
+ }
+
+ let ext_dict_stream_offset = input_stream_offset - ext_dict.len();
+ let end_pos_check = input.len() - MFLIMIT;
+ let mut literal_start = input_pos;
+ let mut cur = input_pos;
+
+ if cur == 0 && input_stream_offset == 0 {
+ // According to the spec we can't start with a match,
+ // except when referencing another block.
+ let hash = T::get_hash_at(input, 0);
+ dict.put_at(hash, 0);
+ cur = 1;
+ }
+
+ loop {
+ // Read the next block into two sections, the literals and the duplicates.
+ let mut step_size;
+ let mut candidate;
+ let mut candidate_source;
+ let mut offset;
+ let mut non_match_count = 1 << INCREASE_STEPSIZE_BITSHIFT;
+ // The number of bytes before our cursor, where the duplicate starts.
+ let mut next_cur = cur;
+
+ // In this loop we search for duplicates via the hashtable. 4bytes or 8bytes are hashed and
+ // compared.
+ loop {
+ step_size = non_match_count >> INCREASE_STEPSIZE_BITSHIFT;
+ non_match_count += 1;
+
+ cur = next_cur;
+ next_cur += step_size;
+
+ // Same as cur + MFLIMIT > input.len()
+ if cur > end_pos_check {
+ handle_last_literals(output, input, literal_start);
+ return Ok(output.pos() - output_start_pos);
+ }
+ // Find a candidate in the dictionary with the hash of the current four bytes.
+ // Unchecked is safe as long as the values from the hash function don't exceed the size
+ // of the table. This is ensured by right shifting the hash values
+ // (`dict_bitshift`) to fit them in the table
+
+ // [Bounds Check]: Can be elided due to `end_pos_check` above
+ let hash = T::get_hash_at(input, cur);
+ candidate = dict.get_at(hash);
+ dict.put_at(hash, cur + input_stream_offset);
+
+ // Sanity check: Matches can't be ahead of `cur`.
+ debug_assert!(candidate <= input_stream_offset + cur);
+
+ // Two requirements to the candidate exists:
+ // - We should not return a position which is merely a hash collision, so that the
+ // candidate actually matches what we search for.
+ // - We can address up to 16-bit offset, hence we are only able to address the candidate
+ // if its offset is less than or equals to 0xFFFF.
+ if input_stream_offset + cur - candidate > MAX_DISTANCE {
+ continue;
+ }
+
+ if candidate >= input_stream_offset {
+ // match within input
+ offset = (input_stream_offset + cur - candidate) as u16;
+ candidate -= input_stream_offset;
+ candidate_source = input;
+ } else if USE_DICT {
+ // Sanity check, which may fail if we lost history beyond MAX_DISTANCE
+ debug_assert!(
+ candidate >= ext_dict_stream_offset,
+ "Lost history in ext dict mode"
+ );
+ // match within ext dict
+ offset = (input_stream_offset + cur - candidate) as u16;
+ candidate -= ext_dict_stream_offset;
+ candidate_source = ext_dict;
+ } else {
+ // Match is not reachable anymore
+ // eg. compressing an independent block frame w/o clearing
+ // the matches tables, only increasing input_stream_offset.
+ // Sanity check
+ debug_assert!(input_pos == 0, "Lost history in prefix mode");
+ continue;
+ }
+ // [Bounds Check]: Candidate is coming from the Hashmap. It can't be out of bounds, but
+ // impossible to prove for the compiler and remove the bounds checks.
+ let cand_bytes: u32 = get_batch(candidate_source, candidate);
+ // [Bounds Check]: Should be able to be elided due to `end_pos_check`.
+ let curr_bytes: u32 = get_batch(input, cur);
+
+ if cand_bytes == curr_bytes {
+ break;
+ }
+ }
+
+ // Extend the match backwards if we can
+ backtrack_match(
+ input,
+ &mut cur,
+ literal_start,
+ candidate_source,
+ &mut candidate,
+ );
+
+ // The length (in bytes) of the literals section.
+ let lit_len = cur - literal_start;
+
+ // Generate the higher half of the token.
+ cur += MINMATCH;
+ candidate += MINMATCH;
+ let duplicate_length = count_same_bytes(input, &mut cur, candidate_source, candidate);
+
+ // Note: The `- 2` offset was copied from the reference implementation, it could be
+ // arbitrary.
+ let hash = T::get_hash_at(input, cur - 2);
+ dict.put_at(hash, cur - 2 + input_stream_offset);
+
+ let token = token_from_literal_and_match_length(lit_len, duplicate_length);
+
+ // Push the token to the output stream.
+ push_byte(output, token);
+ // If we were unable to fit the literals length into the token, write the extensional
+ // part.
+ if lit_len >= 0xF {
+ write_integer(output, lit_len - 0xF);
+ }
+
+ // Now, write the actual literals.
+ //
+ // The unsafe version copies blocks of 8bytes, and therefore may copy up to 7bytes more than
+ // needed. This is safe, because the last 12 bytes (MF_LIMIT) are handled in
+ // handle_last_literals.
+ copy_literals_wild(output, input, literal_start, lit_len);
+ // write the offset in little endian.
+ push_u16(output, offset);
+
+ // If we were unable to fit the duplicates length into the token, write the
+ // extensional part.
+ if duplicate_length >= 0xF {
+ write_integer(output, duplicate_length - 0xF);
+ }
+ literal_start = cur;
+ }
+}
+
+#[inline]
+#[cfg(feature = "safe-encode")]
+fn push_byte(output: &mut impl Sink, el: u8) {
+ output.push(el);
+}
+
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn push_byte(output: &mut impl Sink, el: u8) {
+ unsafe {
+ core::ptr::write(output.pos_mut_ptr(), el);
+ output.set_pos(output.pos() + 1);
+ }
+}
+
+#[inline]
+#[cfg(feature = "safe-encode")]
+fn push_u16(output: &mut impl Sink, el: u16) {
+ output.extend_from_slice(&el.to_le_bytes());
+}
+
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn push_u16(output: &mut impl Sink, el: u16) {
+ unsafe {
+ core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 2);
+ output.set_pos(output.pos() + 2);
+ }
+}
+
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn push_u32(output: &mut impl Sink, el: u32) {
+ unsafe {
+ core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 4);
+ output.set_pos(output.pos() + 4);
+ }
+}
+
+#[inline(always)] // (always) necessary otherwise compiler fails to inline it
+#[cfg(feature = "safe-encode")]
+fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) {
+ output.extend_from_slice_wild(&input[input_start..input_start + len], len)
+}
+
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) {
+ debug_assert!(input_start + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= input.len());
+ debug_assert!(output.pos() + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= output.capacity());
+ unsafe {
+ // Note: This used to be a wild copy loop of 8 bytes, but the compiler consistently
+ // transformed it into a call to memcopy, which hurts performance significantly for
+ // small copies, which are common.
+ let start_ptr = input.as_ptr().add(input_start);
+ match len {
+ 0..=8 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 8),
+ 9..=16 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 16),
+ 17..=24 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 24),
+ _ => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), len),
+ }
+ output.set_pos(output.pos() + len);
+ }
+}
+
+/// Compress all bytes of `input` into `output`.
+/// The method chooses an appropriate hashtable to lookup duplicates.
+/// output should be preallocated with a size of
+/// `get_maximum_output_size`.
+///
+/// Returns the number of bytes written (compressed) into `output`.
+
+#[inline]
+pub(crate) fn compress_into_sink_with_dict<const USE_DICT: bool>(
+ input: &[u8],
+ output: &mut impl Sink,
+ mut dict_data: &[u8],
+) -> Result<usize, CompressError> {
+ if dict_data.len() + input.len() < u16::MAX as usize {
+ let mut dict = HashTable4KU16::new();
+ init_dict(&mut dict, &mut dict_data);
+ compress_internal::<_, USE_DICT, _>(input, 0, output, &mut dict, dict_data, dict_data.len())
+ } else {
+ let mut dict = HashTable4K::new();
+ init_dict(&mut dict, &mut dict_data);
+ compress_internal::<_, USE_DICT, _>(input, 0, output, &mut dict, dict_data, dict_data.len())
+ }
+}
+
+#[inline]
+fn init_dict<T: HashTable>(dict: &mut T, dict_data: &mut &[u8]) {
+ if dict_data.len() > WINDOW_SIZE {
+ *dict_data = &dict_data[dict_data.len() - WINDOW_SIZE..];
+ }
+ let mut i = 0usize;
+ while i + core::mem::size_of::<usize>() <= dict_data.len() {
+ let hash = T::get_hash_at(dict_data, i);
+ dict.put_at(hash, i);
+ // Note: The 3 byte step was copied from the reference implementation, it could be
+ // arbitrary.
+ i += 3;
+ }
+}
+
+/// Returns the maximum output size of the compressed data.
+/// Can be used to preallocate capacity on the output vector
+#[inline]
+pub fn get_maximum_output_size(input_len: usize) -> usize {
+ 16 + 4 + (input_len as f64 * 1.1) as usize
+}
+
+/// Compress all bytes of `input` into `output`.
+/// The method chooses an appropriate hashtable to lookup duplicates.
+/// output should be preallocated with a size of
+/// `get_maximum_output_size`.
+///
+/// Returns the number of bytes written (compressed) into `output`.
+#[inline]
+pub fn compress_into(input: &[u8], output: &mut [u8]) -> Result<usize, CompressError> {
+ compress_into_sink_with_dict::<false>(input, &mut SliceSink::new(output, 0), b"")
+}
+
+/// Compress all bytes of `input` into `output`.
+/// The method chooses an appropriate hashtable to lookup duplicates.
+/// output should be preallocated with a size of
+/// `get_maximum_output_size`.
+///
+/// Returns the number of bytes written (compressed) into `output`.
+#[inline]
+pub fn compress_into_with_dict(
+ input: &[u8],
+ output: &mut [u8],
+ dict_data: &[u8],
+) -> Result<usize, CompressError> {
+ compress_into_sink_with_dict::<true>(input, &mut SliceSink::new(output, 0), dict_data)
+}
+
+#[inline]
+fn compress_into_vec_with_dict<const USE_DICT: bool>(
+ input: &[u8],
+ prepend_size: bool,
+ mut dict_data: &[u8],
+) -> Vec<u8> {
+ let prepend_size_num_bytes = if prepend_size { 4 } else { 0 };
+ let max_compressed_size = get_maximum_output_size(input.len()) + prepend_size_num_bytes;
+ if dict_data.len() <= 3 {
+ dict_data = b"";
+ }
+ #[cfg(feature = "safe-encode")]
+ let mut compressed = {
+ let mut compressed: Vec<u8> = vec![0u8; max_compressed_size];
+ let out = if prepend_size {
+ compressed[..4].copy_from_slice(&(input.len() as u32).to_le_bytes());
+ &mut compressed[4..]
+ } else {
+ &mut compressed
+ };
+ let compressed_len =
+ compress_into_sink_with_dict::<USE_DICT>(input, &mut SliceSink::new(out, 0), dict_data)
+ .unwrap();
+
+ compressed.truncate(prepend_size_num_bytes + compressed_len);
+ compressed
+ };
+ #[cfg(not(feature = "safe-encode"))]
+ let mut compressed = {
+ let mut vec = Vec::with_capacity(max_compressed_size);
+ let start_pos = if prepend_size {
+ vec.extend_from_slice(&(input.len() as u32).to_le_bytes());
+ 4
+ } else {
+ 0
+ };
+ let compressed_len = compress_into_sink_with_dict::<USE_DICT>(
+ input,
+ &mut PtrSink::from_vec(&mut vec, start_pos),
+ dict_data,
+ )
+ .unwrap();
+ unsafe {
+ vec.set_len(prepend_size_num_bytes + compressed_len);
+ }
+ vec
+ };
+
+ compressed.shrink_to_fit();
+ compressed
+}
+
+/// Compress all bytes of `input` into `output`. The uncompressed size will be prepended as a little
+/// endian u32. Can be used in conjunction with `decompress_size_prepended`
+#[inline]
+pub fn compress_prepend_size(input: &[u8]) -> Vec<u8> {
+ compress_into_vec_with_dict::<false>(input, true, b"")
+}
+
+/// Compress all bytes of `input`.
+#[inline]
+pub fn compress(input: &[u8]) -> Vec<u8> {
+ compress_into_vec_with_dict::<false>(input, false, b"")
+}
+
+/// Compress all bytes of `input` with an external dictionary.
+#[inline]
+pub fn compress_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec<u8> {
+ compress_into_vec_with_dict::<true>(input, false, ext_dict)
+}
+
+/// Compress all bytes of `input` into `output`. The uncompressed size will be prepended as a little
+/// endian u32. Can be used in conjunction with `decompress_size_prepended_with_dict`
+#[inline]
+pub fn compress_prepend_size_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec<u8> {
+ compress_into_vec_with_dict::<true>(input, true, ext_dict)
+}
+
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn read_u16_ptr(input: *const u8) -> u16 {
+ let mut num: u16 = 0;
+ unsafe {
+ core::ptr::copy_nonoverlapping(input, &mut num as *mut u16 as *mut u8, 2);
+ }
+ num
+}
+
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn read_u32_ptr(input: *const u8) -> u32 {
+ let mut num: u32 = 0;
+ unsafe {
+ core::ptr::copy_nonoverlapping(input, &mut num as *mut u32 as *mut u8, 4);
+ }
+ num
+}
+
+#[inline]
+#[cfg(not(feature = "safe-encode"))]
+fn read_usize_ptr(input: *const u8) -> usize {
+ let mut num: usize = 0;
+ unsafe {
+ core::ptr::copy_nonoverlapping(
+ input,
+ &mut num as *mut usize as *mut u8,
+ core::mem::size_of::<usize>(),
+ );
+ }
+ num
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_count_same_bytes() {
+ // 8byte aligned block, zeros and ones are added because the end/offset
+ let first: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ];
+ let second: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ ];
+ assert_eq!(count_same_bytes(first, &mut 0, second, 0), 16);
+
+ // 4byte aligned block
+ let first: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,
+ ];
+ let second: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1,
+ ];
+ assert_eq!(count_same_bytes(first, &mut 0, second, 0), 20);
+
+ // 2byte aligned block
+ let first: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,
+ ];
+ let second: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ ];
+ assert_eq!(count_same_bytes(first, &mut 0, second, 0), 22);
+
+ // 1byte aligned block
+ let first: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ ];
+ let second: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1,
+ ];
+ assert_eq!(count_same_bytes(first, &mut 0, second, 0), 23);
+
+ // 1byte aligned block - last byte different
+ let first: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ ];
+ let second: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 6, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1,
+ ];
+ assert_eq!(count_same_bytes(first, &mut 0, second, 0), 22);
+
+ // 1byte aligned block
+ let first: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 9, 5, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ ];
+ let second: &[u8] = &[
+ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 6, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1,
+ ];
+ assert_eq!(count_same_bytes(first, &mut 0, second, 0), 21);
+
+ for diff_idx in 8..100 {
+ let first: Vec<u8> = (0u8..255).cycle().take(100 + 12).collect();
+ let mut second = first.clone();
+ second[diff_idx] = 255;
+ for start in 0..=diff_idx {
+ let same_bytes = count_same_bytes(&first, &mut start.clone(), &second, start);
+ assert_eq!(same_bytes, diff_idx - start);
+ }
+ }
+ }
+
+ #[test]
+ fn test_bug() {
+ let input: &[u8] = &[
+ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
+ ];
+ let _out = compress(input);
+ }
+
+ #[test]
+ fn test_dict() {
+ let input: &[u8] = &[
+ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
+ ];
+ let dict = input;
+ let compressed = compress_with_dict(input, dict);
+ assert_lt!(compressed.len(), compress(input).len());
+
+ assert!(compressed.len() < compress(input).len());
+ let mut uncompressed = vec![0u8; input.len()];
+ let uncomp_size = crate::block::decompress::decompress_into_with_dict(
+ &compressed,
+ &mut uncompressed,
+ dict,
+ )
+ .unwrap();
+ uncompressed.truncate(uncomp_size);
+ assert_eq!(input, uncompressed);
+ }
+
+ #[test]
+ fn test_dict_no_panic() {
+ let input: &[u8] = &[
+ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
+ ];
+ let dict = &[10, 12, 14];
+ let _compressed = compress_with_dict(input, dict);
+ }
+
+ #[test]
+ fn test_dict_match_crossing() {
+ let input: &[u8] = &[
+ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
+ ];
+ let dict = input;
+ let compressed = compress_with_dict(input, dict);
+ assert_lt!(compressed.len(), compress(input).len());
+
+ let mut uncompressed = vec![0u8; input.len() * 2];
+ // copy first half of the input into output
+ let dict_cutoff = dict.len() / 2;
+ let output_start = dict.len() - dict_cutoff;
+ uncompressed[..output_start].copy_from_slice(&dict[dict_cutoff..]);
+ let uncomp_len = {
+ let mut sink = SliceSink::new(&mut uncompressed[..], output_start);
+ crate::block::decompress::decompress_internal::<true, _>(
+ &compressed,
+ &mut sink,
+ &dict[..dict_cutoff],
+ )
+ .unwrap()
+ };
+ assert_eq!(input.len(), uncomp_len);
+ assert_eq!(
+ input,
+ &uncompressed[output_start..output_start + uncomp_len]
+ );
+ }
+
+ #[test]
+ fn test_conformant_last_block() {
+ // From the spec:
+ // The last match must start at least 12 bytes before the end of block.
+ // The last match is part of the penultimate sequence. It is followed by the last sequence,
+ // which contains only literals. Note that, as a consequence, an independent block <
+ // 13 bytes cannot be compressed, because the match must copy "something",
+ // so it needs at least one prior byte.
+ // When a block can reference data from another block, it can start immediately with a match
+ // and no literal, so a block of 12 bytes can be compressed.
+ let aaas: &[u8] = b"aaaaaaaaaaaaaaa";
+
+ // uncompressible
+ let out = compress(&aaas[..12]);
+ assert_gt!(out.len(), 12);
+ // compressible
+ let out = compress(&aaas[..13]);
+ assert_le!(out.len(), 13);
+ let out = compress(&aaas[..14]);
+ assert_le!(out.len(), 14);
+ let out = compress(&aaas[..15]);
+ assert_le!(out.len(), 15);
+
+ // dict uncompressible
+ let out = compress_with_dict(&aaas[..11], aaas);
+ assert_gt!(out.len(), 11);
+ // compressible
+ let out = compress_with_dict(&aaas[..12], aaas);
+ // According to the spec this _could_ compres, but it doesn't in this lib
+ // as it aborts compression for any input len < LZ4_MIN_LENGTH
+ assert_gt!(out.len(), 12);
+ let out = compress_with_dict(&aaas[..13], aaas);
+ assert_le!(out.len(), 13);
+ let out = compress_with_dict(&aaas[..14], aaas);
+ assert_le!(out.len(), 14);
+ let out = compress_with_dict(&aaas[..15], aaas);
+ assert_le!(out.len(), 15);
+ }
+
+ #[test]
+ fn test_dict_size() {
+ let dict = vec![b'a'; 1024 * 1024];
+ let input = &b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"[..];
+ let compressed = compress_prepend_size_with_dict(input, &dict);
+ let decompressed =
+ crate::block::decompress_size_prepended_with_dict(&compressed, &dict).unwrap();
+ assert_eq!(decompressed, input);
+ }
+}
diff --git a/src/block/decompress.rs b/src/block/decompress.rs
new file mode 100644
index 0000000..62065a9
--- /dev/null
+++ b/src/block/decompress.rs
@@ -0,0 +1,544 @@
+//! The block decompression algorithm.
+use crate::block::{DecompressError, MINMATCH};
+use crate::fastcpy_unsafe;
+use crate::sink::SliceSink;
+use crate::sink::{PtrSink, Sink};
+use alloc::vec::Vec;
+
+/// Copies data to output_ptr by self-referential copy from start and match_length
+#[inline]
+unsafe fn duplicate(
+ output_ptr: &mut *mut u8,
+ output_end: *mut u8,
+ start: *const u8,
+ match_length: usize,
+) {
+ // We cannot simply use memcpy or `extend_from_slice`, because these do not allow
+ // self-referential copies: http://ticki.github.io/img/lz4_runs_encoding_diagram.svg
+
+ // Considering that `wild_copy_match_16` can copy up to `16 - 1` extra bytes.
+ // Defer to `duplicate_overlapping` in case of an overlapping match
+ // OR the if the wild copy would copy beyond the end of the output.
+ if (output_ptr.offset_from(start) as usize) < match_length + 16 - 1
+ || (output_end.offset_from(*output_ptr) as usize) < match_length + 16 - 1
+ {
+ duplicate_overlapping(output_ptr, start, match_length);
+ } else {
+ debug_assert!(
+ output_ptr.add(match_length / 16 * 16 + ((match_length % 16) != 0) as usize * 16)
+ <= output_end
+ );
+ wild_copy_from_src_16(start, *output_ptr, match_length);
+ *output_ptr = output_ptr.add(match_length);
+ }
+}
+
+#[inline]
+fn wild_copy_from_src_16(mut source: *const u8, mut dst_ptr: *mut u8, num_items: usize) {
+ // Note: if the compiler auto-vectorizes this it'll hurt performance!
+ // It's not the case for 16 bytes stepsize, but for 8 bytes.
+ unsafe {
+ let dst_ptr_end = dst_ptr.add(num_items);
+ loop {
+ core::ptr::copy_nonoverlapping(source, dst_ptr, 16);
+ source = source.add(16);
+ dst_ptr = dst_ptr.add(16);
+ if dst_ptr >= dst_ptr_end {
+ break;
+ }
+ }
+ }
+}
+
+/// Copy function, if the data start + match_length overlaps into output_ptr
+#[inline]
+#[cfg_attr(nightly, optimize(size))] // to avoid loop unrolling
+unsafe fn duplicate_overlapping(
+ output_ptr: &mut *mut u8,
+ mut start: *const u8,
+ match_length: usize,
+) {
+ // There is an edge case when output_ptr == start, which causes the decoder to potentially
+ // expose up to match_length bytes of uninitialized data in the decompression buffer.
+ // To prevent that we write a dummy zero to output, which will zero out output in such cases.
+ // This is the same strategy used by the reference C implementation https://github.com/lz4/lz4/pull/772
+ output_ptr.write(0u8);
+ let dst_ptr_end = output_ptr.add(match_length);
+
+ while output_ptr.add(1) < dst_ptr_end {
+ // Note that this loop unrolling is done, so that the compiler doesn't do it in a awful
+ // way.
+ // Without that the compiler will unroll/auto-vectorize the copy with a lot of branches.
+ // This is not what we want, as large overlapping copies are not that common.
+ core::ptr::copy(start, *output_ptr, 1);
+ start = start.add(1);
+ *output_ptr = output_ptr.add(1);
+
+ core::ptr::copy(start, *output_ptr, 1);
+ start = start.add(1);
+ *output_ptr = output_ptr.add(1);
+ }
+
+ if *output_ptr < dst_ptr_end {
+ core::ptr::copy(start, *output_ptr, 1);
+ *output_ptr = output_ptr.add(1);
+ }
+}
+
+#[inline]
+unsafe fn copy_from_dict(
+ output_base: *mut u8,
+ output_ptr: &mut *mut u8,
+ ext_dict: &[u8],
+ offset: usize,
+ match_length: usize,
+) -> usize {
+ // If we're here we know offset > output pos, so we have at least 1 byte to copy from dict
+ debug_assert!(output_ptr.offset_from(output_base) >= 0);
+ debug_assert!(offset > output_ptr.offset_from(output_base) as usize);
+ // If unchecked-decode is not disabled we also know that the offset falls within ext_dict
+ debug_assert!(ext_dict.len() + output_ptr.offset_from(output_base) as usize >= offset);
+
+ let dict_offset = ext_dict.len() + output_ptr.offset_from(output_base) as usize - offset;
+ // Can't copy past ext_dict len, the match may cross dict and output
+ let dict_match_length = match_length.min(ext_dict.len() - dict_offset);
+ // TODO test fastcpy_unsafe
+ core::ptr::copy_nonoverlapping(
+ ext_dict.as_ptr().add(dict_offset),
+ *output_ptr,
+ dict_match_length,
+ );
+ *output_ptr = output_ptr.add(dict_match_length);
+ dict_match_length
+}
+
+/// Read an integer.
+///
+/// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In
+/// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add
+/// this byte to our sum and terminate the loop.
+///
+/// # Example
+///
+/// ```notest
+/// 255, 255, 255, 4, 2, 3, 4, 6, 7
+/// ```
+///
+/// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
+/// 4 is the first non-0xFF byte.
+#[inline]
+fn read_integer_ptr(
+ input_ptr: &mut *const u8,
+ _input_ptr_end: *const u8,
+) -> Result<u32, DecompressError> {
+ // We start at zero and count upwards.
+ let mut n: u32 = 0;
+ // If this byte takes value 255 (the maximum value it can take), another byte is read
+ // and added to the sum. This repeats until a byte lower than 255 is read.
+ loop {
+ // We add the next byte until we get a byte which we add to the counting variable.
+
+ #[cfg(not(feature = "unchecked-decode"))]
+ {
+ if *input_ptr >= _input_ptr_end {
+ return Err(DecompressError::ExpectedAnotherByte);
+ }
+ }
+ let extra = unsafe { input_ptr.read() };
+ *input_ptr = unsafe { input_ptr.add(1) };
+ n += extra as u32;
+
+ // We continue if we got 255, break otherwise.
+ if extra != 0xFF {
+ break;
+ }
+ }
+
+ // 255, 255, 255, 8
+ // 111, 111, 111, 101
+
+ Ok(n)
+}
+
+/// Read a little-endian 16-bit integer from the input stream.
+#[inline]
+fn read_u16_ptr(input_ptr: &mut *const u8) -> u16 {
+ let mut num: u16 = 0;
+ unsafe {
+ core::ptr::copy_nonoverlapping(*input_ptr, &mut num as *mut u16 as *mut u8, 2);
+ *input_ptr = input_ptr.add(2);
+ }
+
+ u16::from_le(num)
+}
+
+const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111;
+const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000;
+
+#[test]
+fn check_token() {
+ assert!(!does_token_fit(15));
+ assert!(does_token_fit(14));
+ assert!(does_token_fit(114));
+ assert!(!does_token_fit(0b11110000));
+ assert!(does_token_fit(0b10110000));
+}
+
+/// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4
+/// bits) if the literal length and match_length are both below 15, we don't need to read additional
+/// data, so the token does fit the metadata in a single u8.
+#[inline]
+fn does_token_fit(token: u8) -> bool {
+ !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL
+ || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH)
+}
+
+/// Decompress all bytes of `input` into `output`.
+///
+/// Returns the number of bytes written (decompressed) into `output`.
+#[inline]
+pub(crate) fn decompress_internal<const USE_DICT: bool, S: Sink>(
+ input: &[u8],
+ output: &mut S,
+ ext_dict: &[u8],
+) -> Result<usize, DecompressError> {
+ // Prevent segfault for empty input
+ if input.is_empty() {
+ return Err(DecompressError::ExpectedAnotherByte);
+ }
+
+ let ext_dict = if USE_DICT {
+ ext_dict
+ } else {
+ // ensure optimizer knows ext_dict length is 0 if !USE_DICT
+ debug_assert!(ext_dict.is_empty());
+ &[]
+ };
+ let output_base = unsafe { output.base_mut_ptr() };
+ let output_end = unsafe { output_base.add(output.capacity()) };
+ let output_start_pos_ptr = unsafe { output.base_mut_ptr().add(output.pos()) as *mut u8 };
+ let mut output_ptr = output_start_pos_ptr;
+
+ let mut input_ptr = input.as_ptr();
+ let input_ptr_end = unsafe { input.as_ptr().add(input.len()) };
+ let safe_distance_from_end = (16 /* literal copy */ + 2 /* u16 match offset */ + 1 /* The next token to read (we can skip the check) */).min(input.len()) ;
+ let input_ptr_safe = unsafe { input_ptr_end.sub(safe_distance_from_end) };
+
+ let safe_output_ptr = unsafe {
+ let mut output_num_safe_bytes = output
+ .capacity()
+ .saturating_sub(16 /* literal copy */ + 18 /* match copy */);
+ if USE_DICT {
+ // In the dictionary case the output pointer is moved by the match length in the dictionary.
+ // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have
+ // at least additional 17 bytes of space left in the output buffer in the fast loop.
+ output_num_safe_bytes = output_num_safe_bytes.saturating_sub(17);
+ };
+
+ output_base.add(output_num_safe_bytes)
+ };
+
+ // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is
+ // empty.
+ loop {
+ // Read the token. The token is the first byte in a block. It is divided into two 4-bit
+ // subtokens, the higher and the lower.
+ // This token contains to 4-bit "fields", a higher and a lower, representing the literals'
+ // length and the back reference's length, respectively.
+ let token = unsafe { input_ptr.read() };
+ input_ptr = unsafe { input_ptr.add(1) };
+
+ // Checking for hot-loop.
+ // In most cases the metadata does fit in a single 1byte token (statistically) and we are in
+ // a safe-distance to the end. This enables some optimized handling.
+ //
+ // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But
+ // that doesn't work when the safe_output_ptr is == output_ptr due to insufficient
+ // capacity. So we use `<` instead of `<=`, which covers that case.
+ if does_token_fit(token)
+ && (input_ptr as usize) <= input_ptr_safe as usize
+ && output_ptr < safe_output_ptr
+ {
+ let literal_length = (token >> 4) as usize;
+ let mut match_length = MINMATCH + (token & 0xF) as usize;
+
+ // output_ptr <= safe_output_ptr should guarantee we have enough space in output
+ debug_assert!(
+ unsafe { output_ptr.add(literal_length + match_length) } <= output_end,
+ "{literal_length} + {match_length} {} wont fit ",
+ literal_length + match_length
+ );
+
+ // Copy the literal
+ // The literal is at max 16 bytes, and the is_safe_distance check assures
+ // that we are far away enough from the end so we can safely copy 16 bytes
+ unsafe {
+ core::ptr::copy_nonoverlapping(input_ptr, output_ptr, 16);
+ input_ptr = input_ptr.add(literal_length);
+ output_ptr = output_ptr.add(literal_length);
+ }
+
+ // input_ptr <= input_ptr_safe should guarantee we have enough space in input
+ debug_assert!(input_ptr_end as usize - input_ptr as usize >= 2);
+ let offset = read_u16_ptr(&mut input_ptr) as usize;
+
+ let output_len = unsafe { output_ptr.offset_from(output_base) as usize };
+ let offset = offset.min(output_len + ext_dict.len());
+
+ // Check if part of the match is in the external dict
+ if USE_DICT && offset > output_len {
+ let copied = unsafe {
+ copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length)
+ };
+ if copied == match_length {
+ continue;
+ }
+ // match crosses ext_dict and output
+ match_length -= copied;
+ }
+
+ // Calculate the start of this duplicate segment. At this point offset was already
+ // checked to be in bounds and the external dictionary copy, if any, was
+ // already copied and subtracted from match_length.
+ let start_ptr = unsafe { output_ptr.sub(offset) };
+ debug_assert!(start_ptr >= output_base);
+ debug_assert!(start_ptr < output_end);
+ debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length);
+
+ // In this branch we know that match_length is at most 18 (14 + MINMATCH).
+ // But the blocks can overlap, so make sure they are at least 18 bytes apart
+ // to enable an optimized copy of 18 bytes.
+ if offset >= match_length {
+ unsafe {
+ // _copy_, not copy_non_overlaping, as it may overlap.
+ // Compiles to the same assembly on x68_64.
+ core::ptr::copy(start_ptr, output_ptr, 18);
+ output_ptr = output_ptr.add(match_length);
+ }
+ } else {
+ unsafe {
+ duplicate_overlapping(&mut output_ptr, start_ptr, match_length);
+ }
+ }
+
+ continue;
+ }
+
+ // Now, we read the literals section.
+ // Literal Section
+ // If the initial value is 15, it is indicated that another byte will be read and added to
+ // it
+ let mut literal_length = (token >> 4) as usize;
+ if literal_length != 0 {
+ if literal_length == 15 {
+ // The literal_length length took the maximal value, indicating that there is more
+ // than 15 literal_length bytes. We read the extra integer.
+ literal_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize;
+ }
+
+ #[cfg(not(feature = "unchecked-decode"))]
+ {
+ // Check if literal is out of bounds for the input, and if there is enough space on
+ // the output
+ if literal_length > input_ptr_end as usize - input_ptr as usize {
+ return Err(DecompressError::LiteralOutOfBounds);
+ }
+ if literal_length > unsafe { output_end.offset_from(output_ptr) as usize } {
+ return Err(DecompressError::OutputTooSmall {
+ expected: unsafe { output_ptr.offset_from(output_base) as usize }
+ + literal_length,
+ actual: output.capacity(),
+ });
+ }
+ }
+ unsafe {
+ fastcpy_unsafe::slice_copy(input_ptr, output_ptr, literal_length);
+ output_ptr = output_ptr.add(literal_length);
+ input_ptr = input_ptr.add(literal_length);
+ }
+ }
+
+ // If the input stream is emptied, we break out of the loop. This is only the case
+ // in the end of the stream, since the block is intact otherwise.
+ if input_ptr >= input_ptr_end {
+ break;
+ }
+
+ // Read duplicate section
+ #[cfg(not(feature = "unchecked-decode"))]
+ {
+ if (input_ptr_end as usize) - (input_ptr as usize) < 2 {
+ return Err(DecompressError::ExpectedAnotherByte);
+ }
+ }
+ let offset = read_u16_ptr(&mut input_ptr) as usize;
+ // Obtain the initial match length. The match length is the length of the duplicate segment
+ // which will later be copied from data previously decompressed into the output buffer. The
+ // initial length is derived from the second part of the token (the lower nibble), we read
+ // earlier. Since having a match length of less than 4 would mean negative compression
+ // ratio, we start at 4 (MINMATCH).
+
+ // The initial match length can maximally be 19 (MINMATCH + 15). As with the literal length,
+ // this indicates that there are more bytes to read.
+ let mut match_length = MINMATCH + (token & 0xF) as usize;
+ if match_length == MINMATCH + 15 {
+ // The match length took the maximal value, indicating that there is more bytes. We
+ // read the extra integer.
+ match_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize;
+ }
+
+ // We now copy from the already decompressed buffer. This allows us for storing duplicates
+ // by simply referencing the other location.
+ let output_len = unsafe { output_ptr.offset_from(output_base) as usize };
+
+ // We'll do a bounds check except unchecked-decode is enabled.
+ #[cfg(not(feature = "unchecked-decode"))]
+ {
+ if offset > output_len + ext_dict.len() {
+ return Err(DecompressError::OffsetOutOfBounds);
+ }
+ if match_length > unsafe { output_end.offset_from(output_ptr) as usize } {
+ return Err(DecompressError::OutputTooSmall {
+ expected: output_len + match_length,
+ actual: output.capacity(),
+ });
+ }
+ }
+
+ if USE_DICT && offset > output_len {
+ let copied = unsafe {
+ copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length)
+ };
+ if copied == match_length {
+ #[cfg(not(feature = "unchecked-decode"))]
+ {
+ if input_ptr >= input_ptr_end {
+ return Err(DecompressError::ExpectedAnotherByte);
+ }
+ }
+
+ continue;
+ }
+ // match crosses ext_dict and output
+ match_length -= copied;
+ }
+
+ // Calculate the start of this duplicate segment. At this point offset was already checked
+ // to be in bounds and the external dictionary copy, if any, was already copied and
+ // subtracted from match_length.
+ let start_ptr = unsafe { output_ptr.sub(offset) };
+ debug_assert!(start_ptr >= output_base);
+ debug_assert!(start_ptr < output_end);
+ debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length);
+ unsafe {
+ duplicate(&mut output_ptr, output_end, start_ptr, match_length);
+ }
+ #[cfg(not(feature = "unchecked-decode"))]
+ {
+ if input_ptr >= input_ptr_end {
+ return Err(DecompressError::ExpectedAnotherByte);
+ }
+ }
+ }
+ unsafe {
+ output.set_pos(output_ptr.offset_from(output_base) as usize);
+ Ok(output_ptr.offset_from(output_start_pos_ptr) as usize)
+ }
+}
+
+/// Decompress all bytes of `input` into `output`.
+/// `output` should be preallocated with a size of of the uncompressed data.
+#[inline]
+pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError> {
+ decompress_internal::<false, _>(input, &mut SliceSink::new(output, 0), b"")
+}
+
+/// Decompress all bytes of `input` into `output`.
+///
+/// Returns the number of bytes written (decompressed) into `output`.
+#[inline]
+pub fn decompress_into_with_dict(
+ input: &[u8],
+ output: &mut [u8],
+ ext_dict: &[u8],
+) -> Result<usize, DecompressError> {
+ decompress_internal::<true, _>(input, &mut SliceSink::new(output, 0), ext_dict)
+}
+
+/// Decompress all bytes of `input` into a new vec.
+/// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
+///
+/// # Panics
+/// May panic if the parameter `min_uncompressed_size` is smaller than the
+/// uncompressed data.
+
+#[inline]
+pub fn decompress_with_dict(
+ input: &[u8],
+ min_uncompressed_size: usize,
+ ext_dict: &[u8],
+) -> Result<Vec<u8>, DecompressError> {
+ // Allocate a vector to contain the decompressed stream.
+ let mut vec = Vec::with_capacity(min_uncompressed_size);
+ let decomp_len =
+ decompress_internal::<true, _>(input, &mut PtrSink::from_vec(&mut vec, 0), ext_dict)?;
+ unsafe {
+ vec.set_len(decomp_len);
+ }
+ Ok(vec)
+}
+
+/// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
+/// little endian. Can be used in conjunction with `compress_prepend_size`
+#[inline]
+pub fn decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
+ let (uncompressed_size, input) = super::uncompressed_size(input)?;
+ decompress(input, uncompressed_size)
+}
+
+/// Decompress all bytes of `input` into a new vec.
+/// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
+///
+/// # Panics
+/// May panic if the parameter `min_uncompressed_size` is smaller than the
+/// uncompressed data.
+#[inline]
+pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError> {
+ // Allocate a vector to contain the decompressed stream.
+ let mut vec = Vec::with_capacity(min_uncompressed_size);
+ let decomp_len =
+ decompress_internal::<true, _>(input, &mut PtrSink::from_vec(&mut vec, 0), b"")?;
+ unsafe {
+ vec.set_len(decomp_len);
+ }
+ Ok(vec)
+}
+
+/// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
+/// little endian. Can be used in conjunction with `compress_prepend_size_with_dict`
+#[inline]
+pub fn decompress_size_prepended_with_dict(
+ input: &[u8],
+ ext_dict: &[u8],
+) -> Result<Vec<u8>, DecompressError> {
+ let (uncompressed_size, input) = super::uncompressed_size(input)?;
+ decompress_with_dict(input, uncompressed_size, ext_dict)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn all_literal() {
+ assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49");
+ }
+
+ // this error test is only valid with checked-decode.
+ #[cfg(not(feature = "unchecked-decode"))]
+ #[test]
+ fn offset_oob() {
+ decompress(&[0x10, b'a', 2, 0], 4).unwrap_err();
+ decompress(&[0x40, b'a', 1, 0], 4).unwrap_err();
+ }
+}
diff --git a/src/block/decompress_safe.rs b/src/block/decompress_safe.rs
new file mode 100644
index 0000000..cebe3cb
--- /dev/null
+++ b/src/block/decompress_safe.rs
@@ -0,0 +1,399 @@
+//! The block decompression algorithm.
+
+use core::convert::TryInto;
+
+use crate::block::DecompressError;
+use crate::block::MINMATCH;
+use crate::sink::Sink;
+use crate::sink::SliceSink;
+use alloc::vec;
+use alloc::vec::Vec;
+
+/// Read an integer.
+///
+/// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In
+/// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add
+/// this byte to our sum and terminate the loop.
+///
+/// # Example
+///
+/// ```notest
+/// 255, 255, 255, 4, 2, 3, 4, 6, 7
+/// ```
+///
+/// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
+/// 4 is the first non-0xFF byte.
+#[inline]
+fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError> {
+ // We start at zero and count upwards.
+ let mut n: u32 = 0;
+ // If this byte takes value 255 (the maximum value it can take), another byte is read
+ // and added to the sum. This repeats until a byte lower than 255 is read.
+ loop {
+ // We add the next byte until we get a byte which we add to the counting variable.
+ let extra: u8 = *input
+ .get(*input_pos)
+ .ok_or(DecompressError::ExpectedAnotherByte)?;
+ *input_pos += 1;
+ n += extra as u32;
+
+ // We continue if we got 255, break otherwise.
+ if extra != 0xFF {
+ break;
+ }
+ }
+
+ // 255, 255, 255, 8
+ // 111, 111, 111, 101
+
+ Ok(n)
+}
+
+/// Read a little-endian 16-bit integer from the input stream.
+#[inline]
+fn read_u16(input: &[u8], input_pos: &mut usize) -> Result<u16, DecompressError> {
+ let dst = input
+ .get(*input_pos..*input_pos + 2)
+ .ok_or(DecompressError::ExpectedAnotherByte)?;
+ *input_pos += 2;
+ Ok(u16::from_le_bytes(dst.try_into().unwrap()))
+}
+
+const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111;
+const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000;
+
+#[test]
+fn check_token() {
+ assert!(!does_token_fit(15));
+ assert!(does_token_fit(14));
+ assert!(does_token_fit(114));
+ assert!(!does_token_fit(0b11110000));
+ assert!(does_token_fit(0b10110000));
+}
+
+/// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4
+/// bits) if the literal length and match_length are both below 15, we don't need to read additional
+/// data, so the token does fit the metadata.
+#[inline]
+fn does_token_fit(token: u8) -> bool {
+ !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL
+ || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH)
+}
+
+/// Decompress all bytes of `input` into `output`.
+///
+/// Returns the number of bytes written (decompressed) into `output`.
+#[inline(always)] // (always) necessary to get the best performance in non LTO builds
+pub(crate) fn decompress_internal<const USE_DICT: bool, S: Sink>(
+ input: &[u8],
+ output: &mut S,
+ ext_dict: &[u8],
+) -> Result<usize, DecompressError> {
+ let mut input_pos = 0;
+ let initial_output_pos = output.pos();
+
+ let safe_input_pos = input
+ .len()
+ .saturating_sub(16 /* literal copy */ + 2 /* u16 match offset */);
+ let mut safe_output_pos = output
+ .capacity()
+ .saturating_sub(16 /* literal copy */ + 18 /* match copy */);
+
+ if USE_DICT {
+ // In the dictionary case the output pointer is moved by the match length in the dictionary.
+ // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have
+ // at least additional 17 bytes of space left in the output buffer in the fast loop.
+ safe_output_pos = safe_output_pos.saturating_sub(17);
+ };
+
+ // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is
+ // empty.
+ loop {
+ // Read the token. The token is the first byte in a block. It is divided into two 4-bit
+ // subtokens, the higher and the lower.
+ // This token contains to 4-bit "fields", a higher and a lower, representing the literals'
+ // length and the back reference's length, respectively.
+ let token = *input
+ .get(input_pos)
+ .ok_or(DecompressError::ExpectedAnotherByte)?;
+ input_pos += 1;
+
+ // Checking for hot-loop.
+ // In most cases the metadata does fit in a single 1byte token (statistically) and we are in
+ // a safe-distance to the end. This enables some optimized handling.
+ //
+ // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But
+ // that doesn't work when the safe_output_pos is 0 due to saturated_sub. So we use
+ // `<` instead of `<=`, which covers that case.
+ if does_token_fit(token) && input_pos <= safe_input_pos && output.pos() < safe_output_pos {
+ let literal_length = (token >> 4) as usize;
+
+ // casting to [u8;u16] doesn't seem to make a difference vs &[u8] (same assembly)
+ let input: &[u8; 16] = input[input_pos..input_pos + 16].try_into().unwrap();
+
+ // Copy the literal
+ // The literal is at max 14 bytes, and the is_safe_distance check assures
+ // that we are far away enough from the end so we can safely copy 16 bytes
+ output.extend_from_slice_wild(input, literal_length);
+ input_pos += literal_length;
+
+ // clone as we don't want to mutate
+ let offset = read_u16(input, &mut literal_length.clone())? as usize;
+ input_pos += 2;
+
+ let mut match_length = MINMATCH + (token & 0xF) as usize;
+
+ if USE_DICT && offset > output.pos() {
+ let copied = copy_from_dict(output, ext_dict, offset, match_length)?;
+ if copied == match_length {
+ continue;
+ }
+ // match crosses ext_dict and output, offset is still correct as output pos
+ // increased
+ match_length -= copied;
+ }
+
+ // In this branch we know that match_length is at most 18 (14 + MINMATCH).
+ // But the blocks can overlap, so make sure they are at least 18 bytes apart
+ // to enable an optimized copy of 18 bytes.
+ let start = output.pos().saturating_sub(offset);
+ if offset >= match_length {
+ output.extend_from_within(start, 18, match_length);
+ } else {
+ output.extend_from_within_overlapping(start, match_length)
+ }
+
+ continue;
+ }
+
+ // Now, we read the literals section.
+ // Literal Section
+ // If the initial value is 15, it is indicated that another byte will be read and added to
+ // it
+ let mut literal_length = (token >> 4) as usize;
+ if literal_length != 0 {
+ if literal_length == 15 {
+ // The literal_length length took the maximal value, indicating that there is more
+ // than 15 literal_length bytes. We read the extra integer.
+ literal_length += read_integer(input, &mut input_pos)? as usize;
+ }
+
+ if literal_length > input.len() - input_pos {
+ return Err(DecompressError::LiteralOutOfBounds);
+ }
+ #[cfg(not(feature = "unchecked-decode"))]
+ if literal_length > output.capacity() - output.pos() {
+ return Err(DecompressError::OutputTooSmall {
+ expected: output.pos() + literal_length,
+ actual: output.capacity(),
+ });
+ }
+ output.extend_from_slice(&input[input_pos..input_pos + literal_length]);
+ input_pos += literal_length;
+ }
+
+ // If the input stream is emptied, we break out of the loop. This is only the case
+ // in the end of the stream, since the block is intact otherwise.
+ if input_pos >= input.len() {
+ break;
+ }
+
+ let offset = read_u16(input, &mut input_pos)? as usize;
+ // Obtain the initial match length. The match length is the length of the duplicate segment
+ // which will later be copied from data previously decompressed into the output buffer. The
+ // initial length is derived from the second part of the token (the lower nibble), we read
+ // earlier. Since having a match length of less than 4 would mean negative compression
+ // ratio, we start at 4 (MINMATCH).
+
+ // The initial match length can maximally be 19. As with the literal length, this indicates
+ // that there are more bytes to read.
+ let mut match_length = MINMATCH + (token & 0xF) as usize;
+ if match_length == MINMATCH + 15 {
+ // The match length took the maximal value, indicating that there is more bytes. We
+ // read the extra integer.
+ match_length += read_integer(input, &mut input_pos)? as usize;
+ }
+
+ #[cfg(not(feature = "unchecked-decode"))]
+ if output.pos() + match_length > output.capacity() {
+ return Err(DecompressError::OutputTooSmall {
+ expected: output.pos() + match_length,
+ actual: output.capacity(),
+ });
+ }
+ if USE_DICT && offset > output.pos() {
+ let copied = copy_from_dict(output, ext_dict, offset, match_length)?;
+ if copied == match_length {
+ continue;
+ }
+ // match crosses ext_dict and output, offset is still correct as output_len was
+ // increased
+ match_length -= copied;
+ }
+ // We now copy from the already decompressed buffer. This allows us for storing duplicates
+ // by simply referencing the other location.
+ duplicate_slice(output, offset, match_length)?;
+ }
+ Ok(output.pos() - initial_output_pos)
+}
+
+#[inline]
+fn copy_from_dict(
+ output: &mut impl Sink,
+ ext_dict: &[u8],
+ offset: usize,
+ match_length: usize,
+) -> Result<usize, DecompressError> {
+ // If we're here we know offset > output.pos
+ debug_assert!(offset > output.pos());
+ let (dict_offset, did_overflow) = ext_dict.len().overflowing_sub(offset - output.pos());
+ if did_overflow {
+ return Err(DecompressError::OffsetOutOfBounds);
+ }
+ // Can't copy past ext_dict len, the match may cross dict and output
+ let dict_match_length = match_length.min(ext_dict.len() - dict_offset);
+ let ext_match = &ext_dict[dict_offset..dict_offset + dict_match_length];
+ output.extend_from_slice(ext_match);
+ Ok(dict_match_length)
+}
+
+/// Extends output by self-referential copies
+#[inline(always)] // (always) necessary otherwise compiler fails to inline it
+fn duplicate_slice(
+ output: &mut impl Sink,
+ offset: usize,
+ match_length: usize,
+) -> Result<(), DecompressError> {
+ // This function assumes output will fit match_length, it might panic otherwise.
+ if match_length > offset {
+ duplicate_overlapping_slice(output, offset, match_length)?;
+ } else {
+ let (start, did_overflow) = output.pos().overflowing_sub(offset);
+ if did_overflow {
+ return Err(DecompressError::OffsetOutOfBounds);
+ }
+
+ match match_length {
+ 0..=32 if output.pos() + 32 <= output.capacity() => {
+ output.extend_from_within(start, 32, match_length)
+ }
+ 33..=64 if output.pos() + 64 <= output.capacity() => {
+ output.extend_from_within(start, 64, match_length)
+ }
+ _ => output.extend_from_within(start, match_length, match_length),
+ }
+ }
+ Ok(())
+}
+
+/// self-referential copy for the case data start (end of output - offset) + match_length overlaps
+/// into output
+#[inline]
+fn duplicate_overlapping_slice(
+ sink: &mut impl Sink,
+ offset: usize,
+ match_length: usize,
+) -> Result<(), DecompressError> {
+ // This function assumes output will fit match_length, it might panic otherwise.
+ let (start, did_overflow) = sink.pos().overflowing_sub(offset);
+ if did_overflow {
+ return Err(DecompressError::OffsetOutOfBounds);
+ }
+ if offset == 1 {
+ let val = sink.byte_at(start);
+ sink.extend_with_fill(val, match_length);
+ } else {
+ sink.extend_from_within_overlapping(start, match_length);
+ }
+ Ok(())
+}
+
+/// Decompress all bytes of `input` into `output`.
+/// `output` should be preallocated with a size of of the uncompressed data.
+#[inline]
+pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError> {
+ decompress_internal::<false, _>(input, &mut SliceSink::new(output, 0), b"")
+}
+
+/// Decompress all bytes of `input` into `output`.
+///
+/// Returns the number of bytes written (decompressed) into `output`.
+#[inline]
+pub fn decompress_into_with_dict(
+ input: &[u8],
+ output: &mut [u8],
+ ext_dict: &[u8],
+) -> Result<usize, DecompressError> {
+ decompress_internal::<true, _>(input, &mut SliceSink::new(output, 0), ext_dict)
+}
+
+/// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
+/// litte endian. Can be used in conjunction with `compress_prepend_size`
+#[inline]
+pub fn decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
+ let (uncompressed_size, input) = super::uncompressed_size(input)?;
+ decompress(input, uncompressed_size)
+}
+
+/// Decompress all bytes of `input` into a new vec.
+/// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
+///
+/// # Panics
+/// May panic if the parameter `min_uncompressed_size` is smaller than the
+/// uncompressed data.
+#[inline]
+pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError> {
+ let mut decompressed: Vec<u8> = vec![0; min_uncompressed_size];
+ let decomp_len =
+ decompress_internal::<false, _>(input, &mut SliceSink::new(&mut decompressed, 0), b"")?;
+ decompressed.truncate(decomp_len);
+ Ok(decompressed)
+}
+
+/// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
+/// little endian. Can be used in conjunction with `compress_prepend_size_with_dict`
+#[inline]
+pub fn decompress_size_prepended_with_dict(
+ input: &[u8],
+ ext_dict: &[u8],
+) -> Result<Vec<u8>, DecompressError> {
+ let (uncompressed_size, input) = super::uncompressed_size(input)?;
+ decompress_with_dict(input, uncompressed_size, ext_dict)
+}
+
+/// Decompress all bytes of `input` into a new vec.
+/// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
+///
+/// # Panics
+/// May panic if the parameter `min_uncompressed_size` is smaller than the
+/// uncompressed data.
+#[inline]
+pub fn decompress_with_dict(
+ input: &[u8],
+ min_uncompressed_size: usize,
+ ext_dict: &[u8],
+) -> Result<Vec<u8>, DecompressError> {
+ let mut decompressed: Vec<u8> = vec![0; min_uncompressed_size];
+ let decomp_len =
+ decompress_internal::<true, _>(input, &mut SliceSink::new(&mut decompressed, 0), ext_dict)?;
+ decompressed.truncate(decomp_len);
+ Ok(decompressed)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn all_literal() {
+ assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49");
+ }
+
+ // this error test is only valid in safe-decode.
+ #[cfg(feature = "safe-decode")]
+ #[test]
+ fn offset_oob() {
+ decompress(&[0x10, b'a', 2, 0], 4).unwrap_err();
+ decompress(&[0x40, b'a', 1, 0], 4).unwrap_err();
+ }
+}
diff --git a/src/block/hashtable.rs b/src/block/hashtable.rs
new file mode 100644
index 0000000..7c29db7
--- /dev/null
+++ b/src/block/hashtable.rs
@@ -0,0 +1,161 @@
+use alloc::boxed::Box;
+use core::convert::TryInto;
+
+/// The Hashtable trait used by the compression to store hashed bytes to their position.
+/// `val` can be maximum the size of the input in bytes.
+///
+/// `pos` can have a maximum value of u16::MAX or 65535
+/// If the hashtable is smaller it needs to reduce the pos to its space, e.g. by right
+/// shifting.
+///
+/// Duplication dictionary size.
+///
+/// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and
+/// thus collisions are more likely, hurting the compression ratio.
+
+/// hashes and right shifts to a maximum value of 16bit, 65535
+/// The right shift is done in order to not exceed, the hashtables capacity
+#[inline]
+fn hash(sequence: u32) -> u32 {
+ (sequence.wrapping_mul(2654435761_u32)) >> 16
+}
+
+/// hashes and right shifts to a maximum value of 16bit, 65535
+/// The right shift is done in order to not exceed, the hashtables capacity
+#[cfg(target_pointer_width = "64")]
+#[inline]
+fn hash5(sequence: usize) -> u32 {
+ let primebytes = if cfg!(target_endian = "little") {
+ 889523592379_usize
+ } else {
+ 11400714785074694791_usize
+ };
+ (((sequence << 24).wrapping_mul(primebytes)) >> 48) as u32
+}
+
+pub trait HashTable {
+ fn get_at(&self, pos: usize) -> usize;
+ fn put_at(&mut self, pos: usize, val: usize);
+ fn clear(&mut self);
+ #[inline]
+ #[cfg(target_pointer_width = "64")]
+ fn get_hash_at(input: &[u8], pos: usize) -> usize {
+ hash5(super::compress::get_batch_arch(input, pos)) as usize
+ }
+ #[inline]
+ #[cfg(target_pointer_width = "32")]
+ fn get_hash_at(input: &[u8], pos: usize) -> usize {
+ hash(super::compress::get_batch(input, pos)) as usize
+ }
+}
+
+const HASHTABLE_SIZE_4K: usize = 4 * 1024;
+const HASHTABLE_BIT_SHIFT_4K: usize = 4;
+
+#[derive(Debug)]
+#[repr(align(64))]
+pub struct HashTable4KU16 {
+ dict: Box<[u16; HASHTABLE_SIZE_4K]>,
+}
+impl HashTable4KU16 {
+ #[inline]
+ pub fn new() -> Self {
+ // This generates more efficient assembly in contrast to Box::new(slice), because of an
+ // optmized call alloc_zeroed, vs. alloc + memset
+ // try_into is optimized away
+ let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
+ .into_boxed_slice()
+ .try_into()
+ .unwrap();
+ Self { dict }
+ }
+}
+impl HashTable for HashTable4KU16 {
+ #[inline]
+ fn get_at(&self, hash: usize) -> usize {
+ self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
+ }
+ #[inline]
+ fn put_at(&mut self, hash: usize, val: usize) {
+ self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16;
+ }
+ #[inline]
+ fn clear(&mut self) {
+ self.dict.fill(0);
+ }
+ #[inline]
+ fn get_hash_at(input: &[u8], pos: usize) -> usize {
+ hash(super::get_batch(input, pos)) as usize
+ }
+}
+
+#[derive(Debug)]
+pub struct HashTable4K {
+ dict: Box<[u32; HASHTABLE_SIZE_4K]>,
+}
+impl HashTable4K {
+ #[inline]
+ pub fn new() -> Self {
+ let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
+ .into_boxed_slice()
+ .try_into()
+ .unwrap();
+ Self { dict }
+ }
+
+ #[cold]
+ #[allow(dead_code)]
+ pub fn reposition(&mut self, offset: u32) {
+ for i in self.dict.iter_mut() {
+ *i = i.saturating_sub(offset);
+ }
+ }
+}
+impl HashTable for HashTable4K {
+ #[inline]
+ fn get_at(&self, hash: usize) -> usize {
+ self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
+ }
+ #[inline]
+ fn put_at(&mut self, hash: usize, val: usize) {
+ self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u32;
+ }
+ #[inline]
+ fn clear(&mut self) {
+ self.dict.fill(0);
+ }
+}
+
+const HASHTABLE_SIZE_8K: usize = 8 * 1024;
+const HASH_TABLE_BIT_SHIFT_8K: usize = 3;
+
+#[derive(Debug)]
+pub struct HashTable8K {
+ dict: Box<[u32; HASHTABLE_SIZE_8K]>,
+}
+#[allow(dead_code)]
+impl HashTable8K {
+ #[inline]
+ pub fn new() -> Self {
+ let dict = alloc::vec![0; HASHTABLE_SIZE_8K]
+ .into_boxed_slice()
+ .try_into()
+ .unwrap();
+
+ Self { dict }
+ }
+}
+impl HashTable for HashTable8K {
+ #[inline]
+ fn get_at(&self, hash: usize) -> usize {
+ self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize
+ }
+ #[inline]
+ fn put_at(&mut self, hash: usize, val: usize) {
+ self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32;
+ }
+ #[inline]
+ fn clear(&mut self) {
+ self.dict.fill(0);
+ }
+}
diff --git a/src/block/mod.rs b/src/block/mod.rs
new file mode 100644
index 0000000..31c52f4
--- /dev/null
+++ b/src/block/mod.rs
@@ -0,0 +1,157 @@
+//! LZ4 Block Format
+//!
+//! As defined in <https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md>
+//!
+//! Currently for no_std support only the block format is supported.
+//!
+//! # Example: block format roundtrip
+//! ```
+//! use lz4_flex::block::{compress_prepend_size, decompress_size_prepended};
+//! let input: &[u8] = b"Hello people, what's up?";
+//! let compressed = compress_prepend_size(input);
+//! let uncompressed = decompress_size_prepended(&compressed).unwrap();
+//! assert_eq!(input, uncompressed);
+//! ```
+//!
+
+#[cfg_attr(feature = "safe-encode", forbid(unsafe_code))]
+pub(crate) mod compress;
+pub(crate) mod hashtable;
+
+#[cfg(feature = "safe-decode")]
+#[cfg_attr(feature = "safe-decode", forbid(unsafe_code))]
+pub(crate) mod decompress_safe;
+#[cfg(feature = "safe-decode")]
+pub(crate) use decompress_safe as decompress;
+
+#[cfg(not(feature = "safe-decode"))]
+pub(crate) mod decompress;
+
+pub use compress::*;
+pub use decompress::*;
+
+use core::convert::TryInto;
+use core::fmt;
+
+pub(crate) const WINDOW_SIZE: usize = 64 * 1024;
+
+/// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md#end-of-block-restrictions
+/// The last match must start at least 12 bytes before the end of block. The last match is part of
+/// the penultimate sequence. It is followed by the last sequence, which contains only literals.
+///
+/// Note that, as a consequence, an independent block < 13 bytes cannot be compressed, because the
+/// match must copy "something", so it needs at least one prior byte.
+///
+/// When a block can reference data from another block, it can start immediately with a match and no
+/// literal, so a block of 12 bytes can be compressed.
+const MFLIMIT: usize = 12;
+
+/// The last 5 bytes of input are always literals. Therefore, the last sequence contains at least 5
+/// bytes.
+const LAST_LITERALS: usize = 5;
+
+/// Due the way the compression loop is arrange we may read up to (register_size - 2) bytes from the
+/// current position. So we must end the matches 6 bytes before the end, 1 more than required by the
+/// spec.
+const END_OFFSET: usize = LAST_LITERALS + 1;
+
+/// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md#end-of-block-restrictions
+/// Minimum length of a block
+///
+/// MFLIMIT + 1 for the token.
+const LZ4_MIN_LENGTH: usize = MFLIMIT + 1;
+
+const MAXD_LOG: usize = 16;
+const MAX_DISTANCE: usize = (1 << MAXD_LOG) - 1;
+
+#[allow(dead_code)]
+const MATCH_LENGTH_MASK: u32 = (1_u32 << 4) - 1; // 0b1111 / 15
+
+/// The minimum length of a duplicate
+const MINMATCH: usize = 4;
+
+#[allow(dead_code)]
+const FASTLOOP_SAFE_DISTANCE: usize = 64;
+
+/// Switch for the hashtable size byU16
+#[allow(dead_code)]
+static LZ4_64KLIMIT: usize = (64 * 1024) + (MFLIMIT - 1);
+
+/// An error representing invalid compressed data.
+#[derive(Debug)]
+#[non_exhaustive]
+pub enum DecompressError {
+ /// The provided output is too small
+ OutputTooSmall {
+ /// Minimum expected output size
+ expected: usize,
+ /// Actual size of output
+ actual: usize,
+ },
+ /// Literal is out of bounds of the input
+ LiteralOutOfBounds,
+ /// Expected another byte, but none found.
+ ExpectedAnotherByte,
+ /// Deduplication offset out of bounds (not in buffer).
+ OffsetOutOfBounds,
+}
+
+#[derive(Debug)]
+#[non_exhaustive]
+/// Errors that can happen during compression.
+pub enum CompressError {
+ /// The provided output is too small.
+ OutputTooSmall,
+}
+
+impl fmt::Display for DecompressError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ DecompressError::OutputTooSmall { expected, actual } => {
+ write!(
+ f,
+ "provided output is too small for the decompressed data, actual {actual}, expected \
+ {expected}"
+ )
+ }
+ DecompressError::LiteralOutOfBounds => {
+ f.write_str("literal is out of bounds of the input")
+ }
+ DecompressError::ExpectedAnotherByte => {
+ f.write_str("expected another byte, found none")
+ }
+ DecompressError::OffsetOutOfBounds => {
+ f.write_str("the offset to copy is not contained in the decompressed buffer")
+ }
+ }
+ }
+}
+
+impl fmt::Display for CompressError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ CompressError::OutputTooSmall => f.write_str(
+ "output is too small for the compressed data, use get_maximum_output_size to \
+ reserve enough space",
+ ),
+ }
+ }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for DecompressError {}
+
+#[cfg(feature = "std")]
+impl std::error::Error for CompressError {}
+
+/// This can be used in conjunction with `decompress_size_prepended`.
+/// It will read the first 4 bytes as little-endian encoded length, and return
+/// the rest of the bytes after the length encoding.
+#[inline]
+pub fn uncompressed_size(input: &[u8]) -> Result<(usize, &[u8]), DecompressError> {
+ let size = input.get(..4).ok_or(DecompressError::ExpectedAnotherByte)?;
+ let size: &[u8; 4] = size.try_into().unwrap();
+ let uncompressed_size = u32::from_le_bytes(*size) as usize;
+ let rest = &input[4..];
+ Ok((uncompressed_size, rest))
+}
diff --git a/src/fastcpy.rs b/src/fastcpy.rs
new file mode 100644
index 0000000..8bbd480
--- /dev/null
+++ b/src/fastcpy.rs
@@ -0,0 +1,145 @@
+//! # FastCpy
+//!
+//! The Rust Compiler calls `memcpy` for slices of unknown length.
+//! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`).
+//! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program.
+//!
+//! `fastcpy` is designed to contain not too much assembly, so the overhead is low.
+//!
+//! As fall back the standard `memcpy` is called
+//!
+//! ## Double Copy Trick
+//! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`).
+//! E.g. Slice of length 6 can be copied with two uncoditional copy operations.
+//!
+//! /// [1, 2, 3, 4, 5, 6]
+//! /// [1, 2, 3, 4]
+//! /// [3, 4, 5, 6]
+//!
+
+#[inline]
+pub fn slice_copy(src: &[u8], dst: &mut [u8]) {
+ #[inline(never)]
+ #[cold]
+ #[track_caller]
+ fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! {
+ panic!(
+ "source slice length ({}) does not match destination slice length ({})",
+ src_len, dst_len,
+ );
+ }
+
+ if src.len() != dst.len() {
+ len_mismatch_fail(src.len(), dst.len());
+ }
+ let len = src.len();
+
+ if src.is_empty() {
+ return;
+ }
+
+ if len < 4 {
+ short_copy(src, dst);
+ return;
+ }
+
+ if len < 8 {
+ double_copy_trick::<4>(src, dst);
+ return;
+ }
+
+ if len <= 16 {
+ double_copy_trick::<8>(src, dst);
+ return;
+ }
+
+ if len <= 32 {
+ double_copy_trick::<16>(src, dst);
+ return;
+ }
+
+ /// The code will use the vmovdqu instruction to copy 32 bytes at a time.
+ #[cfg(target_feature = "avx")]
+ {
+ if len <= 64 {
+ double_copy_trick::<32>(src, dst);
+ return;
+ }
+ }
+
+ // For larger sizes we use the default, which calls memcpy
+ // memcpy does some virtual memory tricks to copy large chunks of memory.
+ //
+ // The theory should be that the checks above don't cost much relative to the copy call for
+ // larger copies.
+ // The bounds checks in `copy_from_slice` are elided.
+ dst.copy_from_slice(src);
+}
+
+#[inline(always)]
+fn short_copy(src: &[u8], dst: &mut [u8]) {
+ let len = src.len();
+
+ // length 1-3
+ dst[0] = src[0];
+ if len >= 2 {
+ double_copy_trick::<2>(src, dst);
+ }
+}
+
+#[inline(always)]
+/// [1, 2, 3, 4, 5, 6]
+/// [1, 2, 3, 4]
+/// [3, 4, 5, 6]
+fn double_copy_trick<const SIZE: usize>(src: &[u8], dst: &mut [u8]) {
+ dst[0..SIZE].copy_from_slice(&src[0..SIZE]);
+ dst[src.len() - SIZE..].copy_from_slice(&src[src.len() - SIZE..]);
+}
+
+#[cfg(test)]
+mod tests {
+ use super::slice_copy;
+ use alloc::vec::Vec;
+ use proptest::prelude::*;
+ proptest! {
+ #[test]
+ fn test_fast_short_slice_copy(left: Vec<u8>) {
+ let mut right = vec![0u8; left.len()];
+ slice_copy(&left, &mut right);
+ prop_assert_eq!(&left, &right);
+ }
+ }
+
+ #[test]
+ fn test_fast_short_slice_copy_edge_cases() {
+ for len in 0..(512 * 2) {
+ let left = (0..len).map(|i| i as u8).collect::<Vec<_>>();
+ let mut right = vec![0u8; len];
+ slice_copy(&left, &mut right);
+ assert_eq!(left, right);
+ }
+ }
+
+ #[test]
+ fn test_fail2() {
+ let left = vec![
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ ];
+ let mut right = vec![0u8; left.len()];
+ slice_copy(&left, &mut right);
+ assert_eq!(left, right);
+ }
+
+ #[test]
+ fn test_fail() {
+ let left = vec![
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ];
+ let mut right = vec![0u8; left.len()];
+ slice_copy(&left, &mut right);
+ assert_eq!(left, right);
+ }
+}
diff --git a/src/fastcpy_unsafe.rs b/src/fastcpy_unsafe.rs
new file mode 100644
index 0000000..fb17280
--- /dev/null
+++ b/src/fastcpy_unsafe.rs
@@ -0,0 +1,165 @@
+//! # FastCpy
+//!
+//! The Rust Compiler calls `memcpy` for slices of unknown length.
+//! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`).
+//! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program.
+//!
+//! `fastcpy` is designed to contain not too much assembly, so the overhead is low.
+//!
+//! As fall back the standard `memcpy` is called
+//!
+//! ## Double Copy Trick
+//! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`).
+//! E.g. Slice of length 6 can be copied with two uncoditional copy operations.
+//!
+//! /// [1, 2, 3, 4, 5, 6]
+//! /// [1, 2, 3, 4]
+//! /// [3, 4, 5, 6]
+//!
+
+#[inline]
+pub fn slice_copy(src: *const u8, dst: *mut u8, num_bytes: usize) {
+ if num_bytes < 4 {
+ short_copy(src, dst, num_bytes);
+ return;
+ }
+
+ if num_bytes < 8 {
+ double_copy_trick::<4>(src, dst, num_bytes);
+ return;
+ }
+
+ if num_bytes <= 16 {
+ double_copy_trick::<8>(src, dst, num_bytes);
+ return;
+ }
+
+ //if num_bytes <= 32 {
+ //double_copy_trick::<16>(src, dst, num_bytes);
+ //return;
+ //}
+
+ // /// The code will use the vmovdqu instruction to copy 32 bytes at a time.
+ //#[cfg(target_feature = "avx")]
+ //{
+ //if num_bytes <= 64 {
+ //double_copy_trick::<32>(src, dst, num_bytes);
+ //return;
+ //}
+ //}
+
+ // For larger sizes we use the default, which calls memcpy
+ // memcpy does some virtual memory tricks to copy large chunks of memory.
+ //
+ // The theory should be that the checks above don't cost much relative to the copy call for
+ // larger copies.
+ // The bounds checks in `copy_from_slice` are elided.
+
+ //unsafe { core::ptr::copy_nonoverlapping(src, dst, num_bytes) }
+ wild_copy_from_src::<16>(src, dst, num_bytes)
+}
+
+// Inline never because otherwise we get a call to memcpy -.-
+#[inline]
+fn wild_copy_from_src<const SIZE: usize>(
+ mut source: *const u8,
+ mut dst: *mut u8,
+ num_bytes: usize,
+) {
+ // Note: if the compiler auto-vectorizes this it'll hurt performance!
+ // It's not the case for 16 bytes stepsize, but for 8 bytes.
+ let l_last = unsafe { source.add(num_bytes - SIZE) };
+ let r_last = unsafe { dst.add(num_bytes - SIZE) };
+ let num_bytes = (num_bytes / SIZE) * SIZE;
+
+ unsafe {
+ let dst_ptr_end = dst.add(num_bytes);
+ loop {
+ core::ptr::copy_nonoverlapping(source, dst, SIZE);
+ source = source.add(SIZE);
+ dst = dst.add(SIZE);
+ if dst >= dst_ptr_end {
+ break;
+ }
+ }
+ }
+
+ unsafe {
+ core::ptr::copy_nonoverlapping(l_last, r_last, SIZE);
+ }
+}
+
+#[inline]
+fn short_copy(src: *const u8, dst: *mut u8, len: usize) {
+ unsafe {
+ *dst = *src;
+ }
+ if len >= 2 {
+ double_copy_trick::<2>(src, dst, len);
+ }
+}
+
+#[inline(always)]
+/// [1, 2, 3, 4, 5, 6]
+/// [1, 2, 3, 4]
+/// [3, 4, 5, 6]
+fn double_copy_trick<const SIZE: usize>(src: *const u8, dst: *mut u8, len: usize) {
+ let l_end = unsafe { src.add(len - SIZE) };
+ let r_end = unsafe { dst.add(len - SIZE) };
+
+ unsafe {
+ core::ptr::copy_nonoverlapping(src, dst, SIZE);
+ core::ptr::copy_nonoverlapping(l_end, r_end, SIZE);
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::slice_copy;
+ use alloc::vec::Vec;
+ use proptest::prelude::*;
+ proptest! {
+ #[test]
+ fn test_fast_short_slice_copy(left: Vec<u8>) {
+ if left.is_empty() {
+ return Ok(());
+ }
+ let mut right = vec![0u8; left.len()];
+ slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len());
+ prop_assert_eq!(&left, &right);
+ }
+ }
+
+ #[test]
+ fn test_fast_short_slice_copy_edge_cases() {
+ for len in 1..(512 * 2) {
+ let left = (0..len).map(|i| i as u8).collect::<Vec<_>>();
+ let mut right = vec![0u8; len];
+ slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len());
+ assert_eq!(left, right);
+ }
+ }
+
+ #[test]
+ fn test_fail2() {
+ let left = vec![
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ ];
+ let mut right = vec![0u8; left.len()];
+ slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len());
+ assert_eq!(left, right);
+ }
+
+ #[test]
+ fn test_fail() {
+ let left = vec![
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ];
+ let mut right = vec![0u8; left.len()];
+ slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len());
+ assert_eq!(left, right);
+ }
+}
diff --git a/src/frame/compress.rs b/src/frame/compress.rs
new file mode 100644
index 0000000..cee32eb
--- /dev/null
+++ b/src/frame/compress.rs
@@ -0,0 +1,471 @@
+use std::{
+ fmt,
+ hash::Hasher,
+ io::{self, Write},
+};
+use twox_hash::XxHash32;
+
+use crate::{
+ block::{
+ compress::compress_internal,
+ hashtable::{HashTable, HashTable4K},
+ },
+ sink::vec_sink_for_compression,
+};
+
+use super::Error;
+use super::{
+ header::{BlockInfo, BlockMode, FrameInfo, BLOCK_INFO_SIZE, MAX_FRAME_INFO_SIZE},
+ BlockSize,
+};
+use crate::block::WINDOW_SIZE;
+
+/// A writer for compressing a LZ4 stream.
+///
+/// This `FrameEncoder` wraps any other writer that implements `io::Write`.
+/// Bytes written to this writer are compressed using the [LZ4 frame
+/// format](https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md).
+///
+/// Writes are buffered automatically, so there's no need to wrap the given
+/// writer in a `std::io::BufWriter`.
+///
+/// To ensure a well formed stream the encoder must be finalized by calling
+/// either the [`finish()`], [`try_finish()`], or [`auto_finish()`] methods.
+///
+/// [`finish()`]: Self::finish
+/// [`try_finish()`]: Self::try_finish
+/// [`auto_finish()`]: Self::auto_finish
+///
+/// # Example 1
+/// Serializing json values into a compressed file.
+///
+/// ```no_run
+/// let compressed_file = std::fs::File::create("datafile").unwrap();
+/// let mut compressor = lz4_flex::frame::FrameEncoder::new(compressed_file);
+/// serde_json::to_writer(&mut compressor, &serde_json::json!({ "an": "object" })).unwrap();
+/// compressor.finish().unwrap();
+/// ```
+///
+/// # Example 2
+/// Serializing multiple json values into a compressed file using linked blocks.
+///
+/// ```no_run
+/// let compressed_file = std::fs::File::create("datafile").unwrap();
+/// let mut frame_info = lz4_flex::frame::FrameInfo::new();
+/// frame_info.block_mode = lz4_flex::frame::BlockMode::Linked;
+/// let mut compressor = lz4_flex::frame::FrameEncoder::with_frame_info(frame_info, compressed_file);
+/// for i in 0..10u64 {
+/// serde_json::to_writer(&mut compressor, &serde_json::json!({ "i": i })).unwrap();
+/// }
+/// compressor.finish().unwrap();
+/// ```
+pub struct FrameEncoder<W: io::Write> {
+ /// Our buffer of uncompressed bytes.
+ src: Vec<u8>,
+ /// Index into src: starting point of bytes not yet compressed
+ src_start: usize,
+ /// Index into src: end point of bytes not not yet compressed
+ src_end: usize,
+ /// Index into src: starting point of external dictionary (applicable in Linked block mode)
+ ext_dict_offset: usize,
+ /// Length of external dictionary
+ ext_dict_len: usize,
+ /// Counter of bytes already compressed to the compression_table
+ /// _Not_ the same as `content_len` as this is reset every to 2GB.
+ src_stream_offset: usize,
+ /// Encoder table
+ compression_table: HashTable4K,
+ /// The underlying writer.
+ w: W,
+ /// Xxhash32 used when content checksum is enabled.
+ content_hasher: XxHash32,
+ /// Number of bytes compressed
+ content_len: u64,
+ /// The compressed bytes buffer. Bytes are compressed from src (usually)
+ /// to dst before being written to w.
+ dst: Vec<u8>,
+ /// Whether we have an open frame in the output.
+ is_frame_open: bool,
+ /// Whether we have an frame closed in the output.
+ data_to_frame_written: bool,
+ /// The frame information to be used in this encoder.
+ frame_info: FrameInfo,
+}
+
+impl<W: io::Write> FrameEncoder<W> {
+ fn init(&mut self) {
+ let max_block_size = self.frame_info.block_size.get_size();
+ let src_size = if self.frame_info.block_mode == BlockMode::Linked {
+ // In linked mode we consume the input (bumping src_start) but leave the
+ // beginning of src to be used as a prefix in subsequent blocks.
+ // That is at least until we have at least `max_block_size + WINDOW_SIZE`
+ // bytes in src, then we setup an ext_dict with the last WINDOW_SIZE bytes
+ // and the input goes to the beginning of src again.
+ // Since we always want to be able to write a full block (up to max_block_size)
+ // we need a buffer with at least `max_block_size * 2 + WINDOW_SIZE` bytes.
+ max_block_size * 2 + WINDOW_SIZE
+ } else {
+ max_block_size
+ };
+ // Since this method is called potentially multiple times, don't reserve _additional_
+ // capacity if not required.
+ self.src
+ .reserve(src_size.saturating_sub(self.src.capacity()));
+ self.dst.reserve(
+ crate::block::compress::get_maximum_output_size(max_block_size)
+ .saturating_sub(self.dst.capacity()),
+ );
+ }
+
+ /// Returns a wrapper around `self` that will finish the stream on drop.
+ ///
+ /// # Note
+ /// Errors on drop get silently ignored. If you want to handle errors then use [`finish()`] or
+ /// [`try_finish()`] instead.
+ ///
+ /// [`finish()`]: Self::finish
+ /// [`try_finish()`]: Self::try_finish
+ pub fn auto_finish(self) -> AutoFinishEncoder<W> {
+ AutoFinishEncoder {
+ encoder: Some(self),
+ }
+ }
+
+ /// Creates a new Encoder with the specified FrameInfo.
+ pub fn with_frame_info(frame_info: FrameInfo, wtr: W) -> Self {
+ FrameEncoder {
+ src: Vec::new(),
+ w: wtr,
+ // 16 KB hash table for matches, same as the reference implementation.
+ compression_table: HashTable4K::new(),
+ content_hasher: XxHash32::with_seed(0),
+ content_len: 0,
+ dst: Vec::new(),
+ is_frame_open: false,
+ data_to_frame_written: false,
+ frame_info,
+ src_start: 0,
+ src_end: 0,
+ ext_dict_offset: 0,
+ ext_dict_len: 0,
+ src_stream_offset: 0,
+ }
+ }
+
+ /// Creates a new Encoder with the default settings.
+ pub fn new(wtr: W) -> Self {
+ Self::with_frame_info(Default::default(), wtr)
+ }
+
+ /// The frame information used by this Encoder.
+ pub fn frame_info(&mut self) -> &FrameInfo {
+ &self.frame_info
+ }
+
+ /// Consumes this encoder, flushing internal buffer and writing stream terminator.
+ pub fn finish(mut self) -> Result<W, Error> {
+ self.try_finish()?;
+ Ok(self.w)
+ }
+
+ /// Attempt to finish this output stream, flushing internal buffer and writing stream
+ /// terminator.
+ pub fn try_finish(&mut self) -> Result<(), Error> {
+ match self.flush() {
+ Ok(()) => {
+ // Empty input special case
+ // https://github.com/ouch-org/ouch/pull/163#discussion_r1108965151
+ if !self.is_frame_open && !self.data_to_frame_written {
+ self.begin_frame(0)?;
+ }
+ self.end_frame()?;
+ self.data_to_frame_written = true;
+ Ok(())
+ }
+ Err(err) => Err(err.into()),
+ }
+ }
+
+ /// Returns the underlying writer _without_ flushing the stream.
+ /// This may leave the output in an unfinished state.
+ pub fn into_inner(self) -> W {
+ self.w
+ }
+
+ /// Gets a reference to the underlying writer in this encoder.
+ pub fn get_ref(&self) -> &W {
+ &self.w
+ }
+
+ /// Gets a reference to the underlying writer in this encoder.
+ ///
+ /// Note that mutating the output/input state of the stream may corrupt
+ /// this encoder, so care must be taken when using this method.
+ pub fn get_mut(&mut self) -> &mut W {
+ &mut self.w
+ }
+
+ /// Closes the frame by writing the end marker.
+ fn end_frame(&mut self) -> Result<(), Error> {
+ debug_assert!(self.is_frame_open);
+ self.is_frame_open = false;
+ if let Some(expected) = self.frame_info.content_size {
+ if expected != self.content_len {
+ return Err(Error::ContentLengthError {
+ expected,
+ actual: self.content_len,
+ });
+ }
+ }
+
+ let mut block_info_buffer = [0u8; BLOCK_INFO_SIZE];
+ BlockInfo::EndMark.write(&mut block_info_buffer[..])?;
+ self.w.write_all(&block_info_buffer[..])?;
+ if self.frame_info.content_checksum {
+ let content_checksum = self.content_hasher.finish() as u32;
+ self.w.write_all(&content_checksum.to_le_bytes())?;
+ }
+
+ Ok(())
+ }
+
+ /// Begin the frame by writing the frame header.
+ /// It'll also setup the encoder for compressing blocks for the the new frame.
+ fn begin_frame(&mut self, buf_len: usize) -> io::Result<()> {
+ self.is_frame_open = true;
+ if self.frame_info.block_size == BlockSize::Auto {
+ self.frame_info.block_size = BlockSize::from_buf_length(buf_len);
+ }
+ self.init();
+ let mut frame_info_buffer = [0u8; MAX_FRAME_INFO_SIZE];
+ let size = self.frame_info.write(&mut frame_info_buffer)?;
+ self.w.write_all(&frame_info_buffer[..size])?;
+
+ if self.content_len != 0 {
+ // This is the second or later frame for this Encoder,
+ // reset compressor state for the new frame.
+ self.content_len = 0;
+ self.src_stream_offset = 0;
+ self.src.clear();
+ self.src_start = 0;
+ self.src_end = 0;
+ self.ext_dict_len = 0;
+ self.content_hasher = XxHash32::with_seed(0);
+ self.compression_table.clear();
+ }
+ Ok(())
+ }
+
+ /// Consumes the src contents between src_start and src_end,
+ /// which shouldn't exceed the max block size.
+ fn write_block(&mut self) -> io::Result<()> {
+ debug_assert!(self.is_frame_open);
+ let max_block_size = self.frame_info.block_size.get_size();
+ debug_assert!(self.src_end - self.src_start <= max_block_size);
+
+ // Reposition the compression table if we're anywhere near an overflowing hazard
+ if self.src_stream_offset + max_block_size + WINDOW_SIZE >= u32::MAX as usize / 2 {
+ self.compression_table
+ .reposition((self.src_stream_offset - self.ext_dict_len) as _);
+ self.src_stream_offset = self.ext_dict_len;
+ }
+
+ // input to the compressor, which may include a prefix when blocks are linked
+ let input = &self.src[..self.src_end];
+ // the contents of the block are between src_start and src_end
+ let src = &input[self.src_start..];
+
+ let dst_required_size = crate::block::compress::get_maximum_output_size(src.len());
+
+ let compress_result = if self.ext_dict_len != 0 {
+ debug_assert_eq!(self.frame_info.block_mode, BlockMode::Linked);
+ compress_internal::<_, true, _>(
+ input,
+ self.src_start,
+ &mut vec_sink_for_compression(&mut self.dst, 0, 0, dst_required_size),
+ &mut self.compression_table,
+ &self.src[self.ext_dict_offset..self.ext_dict_offset + self.ext_dict_len],
+ self.src_stream_offset,
+ )
+ } else {
+ compress_internal::<_, false, _>(
+ input,
+ self.src_start,
+ &mut vec_sink_for_compression(&mut self.dst, 0, 0, dst_required_size),
+ &mut self.compression_table,
+ b"",
+ self.src_stream_offset,
+ )
+ };
+
+ let (block_info, block_data) = match compress_result.map_err(Error::CompressionError)? {
+ comp_len if comp_len < src.len() => {
+ (BlockInfo::Compressed(comp_len as _), &self.dst[..comp_len])
+ }
+ _ => (BlockInfo::Uncompressed(src.len() as _), src),
+ };
+
+ // Write the (un)compressed block to the writer and the block checksum (if applicable).
+ let mut block_info_buffer = [0u8; BLOCK_INFO_SIZE];
+ block_info.write(&mut block_info_buffer[..])?;
+ self.w.write_all(&block_info_buffer[..])?;
+ self.w.write_all(block_data)?;
+ if self.frame_info.block_checksums {
+ let mut block_hasher = XxHash32::with_seed(0);
+ block_hasher.write(block_data);
+ let block_checksum = block_hasher.finish() as u32;
+ self.w.write_all(&block_checksum.to_le_bytes())?;
+ }
+
+ // Content checksum, if applicable
+ if self.frame_info.content_checksum {
+ self.content_hasher.write(src);
+ }
+
+ // Buffer and offsets maintenance
+ self.content_len += src.len() as u64;
+ self.src_start += src.len();
+ debug_assert_eq!(self.src_start, self.src_end);
+ if self.frame_info.block_mode == BlockMode::Linked {
+ // In linked mode we consume the input (bumping src_start) but leave the
+ // beginning of src to be used as a prefix in subsequent blocks.
+ // That is at least until we have at least `max_block_size + WINDOW_SIZE`
+ // bytes in src, then we setup an ext_dict with the last WINDOW_SIZE bytes
+ // and the input goes to the beginning of src again.
+ debug_assert_eq!(self.src.capacity(), max_block_size * 2 + WINDOW_SIZE);
+ if self.src_start >= max_block_size + WINDOW_SIZE {
+ // The ext_dict will become the last WINDOW_SIZE bytes
+ self.ext_dict_offset = self.src_end - WINDOW_SIZE;
+ self.ext_dict_len = WINDOW_SIZE;
+ // Input goes in the beginning of the buffer again.
+ self.src_stream_offset += self.src_end;
+ self.src_start = 0;
+ self.src_end = 0;
+ } else if self.src_start + self.ext_dict_len > WINDOW_SIZE {
+ // There's more than WINDOW_SIZE bytes of lookback adding the prefix and ext_dict.
+ // Since we have a limited buffer we must shrink ext_dict in favor of the prefix,
+ // so that we can fit up to max_block_size bytes between dst_start and ext_dict
+ // start.
+ let delta = self
+ .ext_dict_len
+ .min(self.src_start + self.ext_dict_len - WINDOW_SIZE);
+ self.ext_dict_offset += delta;
+ self.ext_dict_len -= delta;
+ debug_assert!(self.src_start + self.ext_dict_len >= WINDOW_SIZE)
+ }
+ debug_assert!(
+ self.ext_dict_len == 0 || self.src_start + max_block_size <= self.ext_dict_offset
+ );
+ } else {
+ // In independent block mode we consume the entire src buffer
+ // which is sized equal to the frame max_block_size.
+ debug_assert_eq!(self.ext_dict_len, 0);
+ debug_assert_eq!(self.src.capacity(), max_block_size);
+ self.src_start = 0;
+ self.src_end = 0;
+ // Advance stream offset so we don't have to reset the match dict
+ // for the next block.
+ self.src_stream_offset += src.len();
+ }
+ debug_assert!(self.src_start <= self.src_end);
+ debug_assert!(self.src_start + max_block_size <= self.src.capacity());
+ Ok(())
+ }
+}
+
+impl<W: io::Write> io::Write for FrameEncoder<W> {
+ fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
+ if !self.is_frame_open && !buf.is_empty() {
+ self.begin_frame(buf.len())?;
+ }
+ let buf_len = buf.len();
+ while !buf.is_empty() {
+ let src_filled = self.src_end - self.src_start;
+ let max_fill_len = self.frame_info.block_size.get_size() - src_filled;
+ if max_fill_len == 0 {
+ // make space by writing next block
+ self.write_block()?;
+ debug_assert_eq!(self.src_end, self.src_start);
+ continue;
+ }
+
+ let fill_len = max_fill_len.min(buf.len());
+ vec_copy_overwriting(&mut self.src, self.src_end, &buf[..fill_len]);
+ buf = &buf[fill_len..];
+ self.src_end += fill_len;
+ }
+ Ok(buf_len)
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ if self.src_start != self.src_end {
+ self.write_block()?;
+ }
+ Ok(())
+ }
+}
+
+/// A wrapper around an [`FrameEncoder<W>`] that finishes the stream on drop.
+///
+/// This can be created by the [`auto_finish()`] method on the [`FrameEncoder<W>`].
+///
+/// # Note
+/// Errors on drop get silently ignored. If you want to handle errors then use [`finish()`] or
+/// [`try_finish()`] instead.
+///
+/// [`finish()`]: FrameEncoder::finish
+/// [`try_finish()`]: FrameEncoder::try_finish
+/// [`auto_finish()`]: FrameEncoder::auto_finish
+pub struct AutoFinishEncoder<W: Write> {
+ // We wrap this in an option to take it during drop.
+ encoder: Option<FrameEncoder<W>>,
+}
+
+impl<W: io::Write> Drop for AutoFinishEncoder<W> {
+ fn drop(&mut self) {
+ if let Some(mut encoder) = self.encoder.take() {
+ let _ = encoder.try_finish();
+ }
+ }
+}
+
+impl<W: Write> Write for AutoFinishEncoder<W> {
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ self.encoder.as_mut().unwrap().write(buf)
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ self.encoder.as_mut().unwrap().flush()
+ }
+}
+
+impl<W: fmt::Debug + io::Write> fmt::Debug for FrameEncoder<W> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("FrameEncoder")
+ .field("w", &self.w)
+ .field("frame_info", &self.frame_info)
+ .field("is_frame_open", &self.is_frame_open)
+ .field("content_hasher", &self.content_hasher)
+ .field("content_len", &self.content_len)
+ .field("dst", &"[...]")
+ .field("src", &"[...]")
+ .field("src_start", &self.src_start)
+ .field("src_end", &self.src_end)
+ .field("ext_dict_offset", &self.ext_dict_offset)
+ .field("ext_dict_len", &self.ext_dict_len)
+ .field("src_stream_offset", &self.src_stream_offset)
+ .finish()
+ }
+}
+
+/// Copy `src` into `target` starting from the `start` index, overwriting existing data if any.
+#[inline]
+fn vec_copy_overwriting(target: &mut Vec<u8>, target_start: usize, src: &[u8]) {
+ debug_assert!(target_start + src.len() <= target.capacity());
+
+ // By combining overwriting (copy_from_slice) and extending (extend_from_slice)
+ // we can fill the ring buffer without initializing it (eg. filling with 0).
+ let overwrite_len = (target.len() - target_start).min(src.len());
+ target[target_start..target_start + overwrite_len].copy_from_slice(&src[..overwrite_len]);
+ target.extend_from_slice(&src[overwrite_len..]);
+}
diff --git a/src/frame/decompress.rs b/src/frame/decompress.rs
new file mode 100644
index 0000000..2b495e2
--- /dev/null
+++ b/src/frame/decompress.rs
@@ -0,0 +1,449 @@
+use std::{
+ convert::TryInto,
+ fmt,
+ hash::Hasher,
+ io::{self, BufRead, ErrorKind},
+ mem::size_of,
+};
+use twox_hash::XxHash32;
+
+use super::header::{
+ BlockInfo, BlockMode, FrameInfo, LZ4F_LEGACY_MAGIC_NUMBER, MAGIC_NUMBER_SIZE,
+ MAX_FRAME_INFO_SIZE, MIN_FRAME_INFO_SIZE,
+};
+use super::Error;
+use crate::{
+ block::WINDOW_SIZE,
+ sink::{vec_sink_for_decompression, SliceSink},
+};
+
+/// A reader for decompressing the LZ4 frame format
+///
+/// This Decoder wraps any other reader that implements `io::Read`.
+/// Bytes read will be decompressed according to the [LZ4 frame format](
+/// https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md).
+///
+/// # Example 1
+/// Deserializing json values out of a compressed file.
+///
+/// ```no_run
+/// let compressed_input = std::fs::File::open("datafile").unwrap();
+/// let mut decompressed_input = lz4_flex::frame::FrameDecoder::new(compressed_input);
+/// let json: serde_json::Value = serde_json::from_reader(decompressed_input).unwrap();
+/// ```
+///
+/// # Example
+/// Deserializing multiple json values out of a compressed file
+///
+/// ```no_run
+/// let compressed_input = std::fs::File::open("datafile").unwrap();
+/// let mut decompressed_input = lz4_flex::frame::FrameDecoder::new(compressed_input);
+/// loop {
+/// match serde_json::from_reader::<_, serde_json::Value>(&mut decompressed_input) {
+/// Ok(json) => { println!("json {:?}", json); }
+/// Err(e) if e.is_eof() => break,
+/// Err(e) => panic!("{}", e),
+/// }
+/// }
+/// ```
+pub struct FrameDecoder<R: io::Read> {
+ /// The underlying reader.
+ r: R,
+ /// The FrameInfo of the frame currently being decoded.
+ /// It starts as `None` and is filled with the FrameInfo is read from the input.
+ /// It's reset to `None` once the frame EndMarker is read from the input.
+ current_frame_info: Option<FrameInfo>,
+ /// Xxhash32 used when content checksum is enabled.
+ content_hasher: XxHash32,
+ /// Total length of decompressed output for the current frame.
+ content_len: u64,
+ /// The compressed bytes buffer, taken from the underlying reader.
+ src: Vec<u8>,
+ /// The decompressed bytes buffer. Bytes are decompressed from src to dst
+ /// before being passed back to the caller.
+ dst: Vec<u8>,
+ /// Index into dst and length: starting point of bytes previously output
+ /// that are still part of the decompressor window.
+ ext_dict_offset: usize,
+ ext_dict_len: usize,
+ /// Index into dst: starting point of bytes not yet read by caller.
+ dst_start: usize,
+ /// Index into dst: ending point of bytes not yet read by caller.
+ dst_end: usize,
+}
+
+impl<R: io::Read> FrameDecoder<R> {
+ /// Creates a new Decoder for the specified reader.
+ pub fn new(rdr: R) -> FrameDecoder<R> {
+ FrameDecoder {
+ r: rdr,
+ src: Default::default(),
+ dst: Default::default(),
+ ext_dict_offset: 0,
+ ext_dict_len: 0,
+ dst_start: 0,
+ dst_end: 0,
+ current_frame_info: None,
+ content_hasher: XxHash32::with_seed(0),
+ content_len: 0,
+ }
+ }
+
+ /// Gets a reference to the underlying reader in this decoder.
+ pub fn get_ref(&self) -> &R {
+ &self.r
+ }
+
+ /// Gets a mutable reference to the underlying reader in this decoder.
+ ///
+ /// Note that mutation of the stream may result in surprising results if
+ /// this decoder is continued to be used.
+ pub fn get_mut(&mut self) -> &mut R {
+ &mut self.r
+ }
+
+ /// Consumes the FrameDecoder and returns the underlying reader.
+ pub fn into_inner(self) -> R {
+ self.r
+ }
+
+ fn read_frame_info(&mut self) -> Result<usize, io::Error> {
+ let mut buffer = [0u8; MAX_FRAME_INFO_SIZE];
+
+ match self.r.read(&mut buffer[..MAGIC_NUMBER_SIZE])? {
+ 0 => return Ok(0),
+ MAGIC_NUMBER_SIZE => (),
+ read => self.r.read_exact(&mut buffer[read..MAGIC_NUMBER_SIZE])?,
+ }
+
+ if u32::from_le_bytes(buffer[0..MAGIC_NUMBER_SIZE].try_into().unwrap())
+ != LZ4F_LEGACY_MAGIC_NUMBER
+ {
+ match self
+ .r
+ .read(&mut buffer[MAGIC_NUMBER_SIZE..MIN_FRAME_INFO_SIZE])?
+ {
+ 0 => return Ok(0),
+ MIN_FRAME_INFO_SIZE => (),
+ read => self
+ .r
+ .read_exact(&mut buffer[MAGIC_NUMBER_SIZE + read..MIN_FRAME_INFO_SIZE])?,
+ }
+ }
+ let required = FrameInfo::read_size(&buffer[..MIN_FRAME_INFO_SIZE])?;
+ if required != MIN_FRAME_INFO_SIZE && required != MAGIC_NUMBER_SIZE {
+ self.r
+ .read_exact(&mut buffer[MIN_FRAME_INFO_SIZE..required])?;
+ }
+
+ let frame_info = FrameInfo::read(&buffer[..required])?;
+ if frame_info.dict_id.is_some() {
+ // Unsupported right now so it must be None
+ return Err(Error::DictionaryNotSupported.into());
+ }
+
+ let max_block_size = frame_info.block_size.get_size();
+ let dst_size = if frame_info.block_mode == BlockMode::Linked {
+ // In linked mode we consume the output (bumping dst_start) but leave the
+ // beginning of dst to be used as a prefix in subsequent blocks.
+ // That is at least until we have at least `max_block_size + WINDOW_SIZE`
+ // bytes in dst, then we setup an ext_dict with the last WINDOW_SIZE bytes
+ // and the output goes to the beginning of dst again.
+ // Since we always want to be able to write a full block (up to max_block_size)
+ // we need a buffer with at least `max_block_size * 2 + WINDOW_SIZE` bytes.
+ max_block_size * 2 + WINDOW_SIZE
+ } else {
+ max_block_size
+ };
+ self.src.clear();
+ self.dst.clear();
+ self.src.reserve_exact(max_block_size);
+ self.dst.reserve_exact(dst_size);
+ self.current_frame_info = Some(frame_info);
+ self.content_hasher = XxHash32::with_seed(0);
+ self.content_len = 0;
+ self.ext_dict_len = 0;
+ self.dst_start = 0;
+ self.dst_end = 0;
+ Ok(required)
+ }
+
+ #[inline]
+ fn read_checksum(r: &mut R) -> Result<u32, io::Error> {
+ let mut checksum_buffer = [0u8; size_of::<u32>()];
+ r.read_exact(&mut checksum_buffer[..])?;
+ let checksum = u32::from_le_bytes(checksum_buffer);
+ Ok(checksum)
+ }
+
+ #[inline]
+ fn check_block_checksum(data: &[u8], expected_checksum: u32) -> Result<(), io::Error> {
+ let mut block_hasher = XxHash32::with_seed(0);
+ block_hasher.write(data);
+ let calc_checksum = block_hasher.finish() as u32;
+ if calc_checksum != expected_checksum {
+ return Err(Error::BlockChecksumError.into());
+ }
+ Ok(())
+ }
+
+ fn read_block(&mut self) -> io::Result<usize> {
+ debug_assert_eq!(self.dst_start, self.dst_end);
+ let frame_info = self.current_frame_info.as_ref().unwrap();
+
+ // Adjust dst buffer offsets to decompress the next block
+ let max_block_size = frame_info.block_size.get_size();
+ if frame_info.block_mode == BlockMode::Linked {
+ // In linked mode we consume the output (bumping dst_start) but leave the
+ // beginning of dst to be used as a prefix in subsequent blocks.
+ // That is at least until we have at least `max_block_size + WINDOW_SIZE`
+ // bytes in dst, then we setup an ext_dict with the last WINDOW_SIZE bytes
+ // and the output goes to the beginning of dst again.
+ debug_assert_eq!(self.dst.capacity(), max_block_size * 2 + WINDOW_SIZE);
+ if self.dst_start + max_block_size > self.dst.capacity() {
+ // Output might not fit in the buffer.
+ // The ext_dict will become the last WINDOW_SIZE bytes
+ debug_assert!(self.dst_start >= max_block_size + WINDOW_SIZE);
+ self.ext_dict_offset = self.dst_start - WINDOW_SIZE;
+ self.ext_dict_len = WINDOW_SIZE;
+ // Output goes in the beginning of the buffer again.
+ self.dst_start = 0;
+ self.dst_end = 0;
+ } else if self.dst_start + self.ext_dict_len > WINDOW_SIZE {
+ // There's more than WINDOW_SIZE bytes of lookback adding the prefix and ext_dict.
+ // Since we have a limited buffer we must shrink ext_dict in favor of the prefix,
+ // so that we can fit up to max_block_size bytes between dst_start and ext_dict
+ // start.
+ let delta = self
+ .ext_dict_len
+ .min(self.dst_start + self.ext_dict_len - WINDOW_SIZE);
+ self.ext_dict_offset += delta;
+ self.ext_dict_len -= delta;
+ debug_assert!(self.dst_start + self.ext_dict_len >= WINDOW_SIZE)
+ }
+ } else {
+ debug_assert_eq!(self.ext_dict_len, 0);
+ debug_assert_eq!(self.dst.capacity(), max_block_size);
+ self.dst_start = 0;
+ self.dst_end = 0;
+ }
+
+ // Read and decompress block
+ let block_info = {
+ let mut buffer = [0u8; 4];
+ if let Err(err) = self.r.read_exact(&mut buffer) {
+ if err.kind() == ErrorKind::UnexpectedEof {
+ return Ok(0);
+ } else {
+ return Err(err);
+ }
+ }
+ BlockInfo::read(&buffer)?
+ };
+ match block_info {
+ BlockInfo::Uncompressed(len) => {
+ let len = len as usize;
+ if len > max_block_size {
+ return Err(Error::BlockTooBig.into());
+ }
+ // TODO: Attempt to avoid initialization of read buffer when
+ // https://github.com/rust-lang/rust/issues/42788 stabilizes
+ self.r.read_exact(vec_resize_and_get_mut(
+ &mut self.dst,
+ self.dst_start,
+ self.dst_start + len,
+ ))?;
+ if frame_info.block_checksums {
+ let expected_checksum = Self::read_checksum(&mut self.r)?;
+ Self::check_block_checksum(
+ &self.dst[self.dst_start..self.dst_start + len],
+ expected_checksum,
+ )?;
+ }
+
+ self.dst_end += len;
+ self.content_len += len as u64;
+ }
+ BlockInfo::Compressed(len) => {
+ let len = len as usize;
+ if len > max_block_size {
+ return Err(Error::BlockTooBig.into());
+ }
+ // TODO: Attempt to avoid initialization of read buffer when
+ // https://github.com/rust-lang/rust/issues/42788 stabilizes
+ self.r
+ .read_exact(vec_resize_and_get_mut(&mut self.src, 0, len))?;
+ if frame_info.block_checksums {
+ let expected_checksum = Self::read_checksum(&mut self.r)?;
+ Self::check_block_checksum(&self.src[..len], expected_checksum)?;
+ }
+
+ let with_dict_mode =
+ frame_info.block_mode == BlockMode::Linked && self.ext_dict_len != 0;
+ let decomp_size = if with_dict_mode {
+ debug_assert!(self.dst_start + max_block_size <= self.ext_dict_offset);
+ let (head, tail) = self.dst.split_at_mut(self.ext_dict_offset);
+ let ext_dict = &tail[..self.ext_dict_len];
+
+ debug_assert!(head.len() - self.dst_start >= max_block_size);
+ crate::block::decompress::decompress_internal::<true, _>(
+ &self.src[..len],
+ &mut SliceSink::new(head, self.dst_start),
+ ext_dict,
+ )
+ } else {
+ // Independent blocks OR linked blocks with only prefix data
+ debug_assert!(self.dst.capacity() - self.dst_start >= max_block_size);
+ crate::block::decompress::decompress_internal::<false, _>(
+ &self.src[..len],
+ &mut vec_sink_for_decompression(
+ &mut self.dst,
+ 0,
+ self.dst_start,
+ self.dst_start + max_block_size,
+ ),
+ b"",
+ )
+ }
+ .map_err(Error::DecompressionError)?;
+
+ self.dst_end += decomp_size;
+ self.content_len += decomp_size as u64;
+ }
+
+ BlockInfo::EndMark => {
+ if let Some(expected) = frame_info.content_size {
+ if self.content_len != expected {
+ return Err(Error::ContentLengthError {
+ expected,
+ actual: self.content_len,
+ }
+ .into());
+ }
+ }
+ if frame_info.content_checksum {
+ let expected_checksum = Self::read_checksum(&mut self.r)?;
+ let calc_checksum = self.content_hasher.finish() as u32;
+ if calc_checksum != expected_checksum {
+ return Err(Error::ContentChecksumError.into());
+ }
+ }
+ self.current_frame_info = None;
+ return Ok(0);
+ }
+ }
+
+ // Content checksum, if applicable
+ if frame_info.content_checksum {
+ self.content_hasher
+ .write(&self.dst[self.dst_start..self.dst_end]);
+ }
+
+ Ok(self.dst_end - self.dst_start)
+ }
+
+ fn read_more(&mut self) -> io::Result<usize> {
+ if self.current_frame_info.is_none() && self.read_frame_info()? == 0 {
+ return Ok(0);
+ }
+ self.read_block()
+ }
+}
+
+impl<R: io::Read> io::Read for FrameDecoder<R> {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ loop {
+ // Fill read buffer if there's uncompressed data left
+ if self.dst_start < self.dst_end {
+ let read_len = std::cmp::min(self.dst_end - self.dst_start, buf.len());
+ let dst_read_end = self.dst_start + read_len;
+ buf[..read_len].copy_from_slice(&self.dst[self.dst_start..dst_read_end]);
+ self.dst_start = dst_read_end;
+ return Ok(read_len);
+ }
+ if self.read_more()? == 0 {
+ return Ok(0);
+ }
+ }
+ }
+
+ fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
+ let mut written = 0;
+ loop {
+ match self.fill_buf() {
+ Ok(b) if b.is_empty() => return Ok(written),
+ Ok(b) => {
+ let s = std::str::from_utf8(b).map_err(|_| {
+ io::Error::new(
+ io::ErrorKind::InvalidData,
+ "stream did not contain valid UTF-8",
+ )
+ })?;
+ buf.push_str(s);
+ let len = s.len();
+ self.consume(len);
+ written += len;
+ }
+ Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
+ Err(e) => return Err(e),
+ }
+ }
+ }
+
+ fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
+ let mut written = 0;
+ loop {
+ match self.fill_buf() {
+ Ok(b) if b.is_empty() => return Ok(written),
+ Ok(b) => {
+ buf.extend_from_slice(b);
+ let len = b.len();
+ self.consume(len);
+ written += len;
+ }
+ Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
+ Err(e) => return Err(e),
+ }
+ }
+ }
+}
+
+impl<R: io::Read> io::BufRead for FrameDecoder<R> {
+ fn fill_buf(&mut self) -> io::Result<&[u8]> {
+ if self.dst_start == self.dst_end {
+ self.read_more()?;
+ }
+ Ok(&self.dst[self.dst_start..self.dst_end])
+ }
+
+ fn consume(&mut self, amt: usize) {
+ assert!(amt <= self.dst_end - self.dst_start);
+ self.dst_start += amt;
+ }
+}
+
+impl<R: fmt::Debug + io::Read> fmt::Debug for FrameDecoder<R> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("FrameDecoder")
+ .field("r", &self.r)
+ .field("content_hasher", &self.content_hasher)
+ .field("content_len", &self.content_len)
+ .field("src", &"[...]")
+ .field("dst", &"[...]")
+ .field("dst_start", &self.dst_start)
+ .field("dst_end", &self.dst_end)
+ .field("ext_dict_offset", &self.ext_dict_offset)
+ .field("ext_dict_len", &self.ext_dict_len)
+ .field("current_frame_info", &self.current_frame_info)
+ .finish()
+ }
+}
+
+/// Similar to `v.get_mut(start..end) but will adjust the len if needed.
+#[inline]
+fn vec_resize_and_get_mut(v: &mut Vec<u8>, start: usize, end: usize) -> &mut [u8] {
+ if end > v.len() {
+ v.resize(end, 0)
+ }
+ &mut v[start..end]
+}
diff --git a/src/frame/header.rs b/src/frame/header.rs
new file mode 100644
index 0000000..1513c11
--- /dev/null
+++ b/src/frame/header.rs
@@ -0,0 +1,412 @@
+use twox_hash::XxHash32;
+
+use super::Error;
+use std::{
+ convert::TryInto,
+ fmt::Debug,
+ hash::Hasher,
+ io,
+ io::{Read, Write},
+};
+
+const FLG_RESERVED_MASK: u8 = 0b00000010;
+const FLG_VERSION_MASK: u8 = 0b11000000;
+const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000;
+
+const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000;
+const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000;
+const FLG_CONTENT_SIZE: u8 = 0b00001000;
+const FLG_CONTENT_CHECKSUM: u8 = 0b00000100;
+const FLG_DICTIONARY_ID: u8 = 0b00000001;
+
+const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK;
+const BD_BLOCK_SIZE_MASK: u8 = 0b01110000;
+const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4;
+
+const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000;
+
+const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204;
+pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102;
+const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F;
+
+pub(crate) const MAGIC_NUMBER_SIZE: usize = 4;
+pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7;
+pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19;
+pub(crate) const BLOCK_INFO_SIZE: usize = 4;
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+/// Different predefines blocksizes to choose when compressing data.
+#[derive(Default)]
+pub enum BlockSize {
+ /// Will detect optimal frame size based on the size of the first write call
+ #[default]
+ Auto = 0,
+ /// The default block size.
+ Max64KB = 4,
+ /// 256KB block size.
+ Max256KB = 5,
+ /// 1MB block size.
+ Max1MB = 6,
+ /// 4MB block size.
+ Max4MB = 7,
+ /// 8MB block size.
+ Max8MB = 8,
+}
+
+impl BlockSize {
+ /// Try to find optimal size based on passed buffer length.
+ pub(crate) fn from_buf_length(buf_len: usize) -> Self {
+ let mut blocksize = BlockSize::Max4MB;
+
+ for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] {
+ if buf_len > candidate.get_size() {
+ return blocksize;
+ }
+ blocksize = candidate;
+ }
+ BlockSize::Max64KB
+ }
+ pub(crate) fn get_size(&self) -> usize {
+ match self {
+ BlockSize::Auto => unreachable!(),
+ BlockSize::Max64KB => 64 * 1024,
+ BlockSize::Max256KB => 256 * 1024,
+ BlockSize::Max1MB => 1024 * 1024,
+ BlockSize::Max4MB => 4 * 1024 * 1024,
+ BlockSize::Max8MB => 8 * 1024 * 1024,
+ }
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+/// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo]
+#[derive(Default)]
+pub enum BlockMode {
+ /// Every block is compressed independently. The default.
+ #[default]
+ Independent,
+ /// Blocks can reference data from previous blocks.
+ ///
+ /// Effective when the stream contains small blocks.
+ Linked,
+}
+
+// From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
+//
+// General Structure of LZ4 Frame format
+// -------------------------------------
+//
+// | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
+// |:-------:|:-------------:| ----- | ----- | ------- | ----------- |
+// | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes |
+//
+// Frame Descriptor
+// ----------------
+//
+// | FLG | BD | (Content Size) | (Dictionary ID) | HC |
+// | ------- | ------- |:--------------:|:---------------:| ------- |
+// | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte |
+//
+// __FLG byte__
+//
+// | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 |
+// | ------- |-------|-------|----------|------|----------|----------|------|
+// |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID|
+//
+// __BD byte__
+//
+// | BitNb | 7 | 6-5-4 | 3-2-1-0 |
+// | ------- | -------- | ------------- | -------- |
+// |FieldName|*Reserved*| Block MaxSize |*Reserved*|
+//
+// Data Blocks
+// -----------
+//
+// | Block Size | data | (Block Checksum) |
+// |:----------:| ------ |:----------------:|
+// | 4 bytes | | 0 - 4 bytes |
+//
+#[derive(Debug, Default, Clone)]
+/// The metadata for de/compressing with lz4 frame format.
+pub struct FrameInfo {
+ /// If set, includes the total uncompressed size of data in the frame.
+ pub content_size: Option<u64>,
+ /// The identifier for the dictionary that must be used to correctly decode data.
+ /// The compressor and the decompressor must use exactly the same dictionary.
+ ///
+ /// Note that this is currently unsupported and for this reason it's not pub.
+ pub(crate) dict_id: Option<u32>,
+ /// The maximum uncompressed size of each data block.
+ pub block_size: BlockSize,
+ /// The block mode.
+ pub block_mode: BlockMode,
+ /// If set, includes a checksum for each data block in the frame.
+ pub block_checksums: bool,
+ /// If set, includes a content checksum to verify that the full frame contents have been
+ /// decoded correctly.
+ pub content_checksum: bool,
+ /// If set, use the legacy frame format
+ pub legacy_frame: bool,
+}
+
+impl FrameInfo {
+ /// Create a new `FrameInfo`.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Whether to include the total uncompressed size of data in the frame.
+ pub fn content_size(mut self, content_size: Option<u64>) -> Self {
+ self.content_size = content_size;
+ self
+ }
+
+ /// The maximum uncompressed size of each data block.
+ pub fn block_size(mut self, block_size: BlockSize) -> Self {
+ self.block_size = block_size;
+ self
+ }
+
+ /// The block mode.
+ pub fn block_mode(mut self, block_mode: BlockMode) -> Self {
+ self.block_mode = block_mode;
+ self
+ }
+
+ /// If set, includes a checksum for each data block in the frame.
+ pub fn block_checksums(mut self, block_checksums: bool) -> Self {
+ self.block_checksums = block_checksums;
+ self
+ }
+
+ /// If set, includes a content checksum to verify that the full frame contents have been
+ /// decoded correctly.
+ pub fn content_checksum(mut self, content_checksum: bool) -> Self {
+ self.content_checksum = content_checksum;
+ self
+ }
+
+ /// If set, use the legacy frame format.
+ pub fn legacy_frame(mut self, legacy_frame: bool) -> Self {
+ self.legacy_frame = legacy_frame;
+ self
+ }
+
+ pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> {
+ let mut required = MIN_FRAME_INFO_SIZE;
+ let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap());
+ if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
+ return Ok(MAGIC_NUMBER_SIZE);
+ }
+
+ if input.len() < required {
+ return Ok(required);
+ }
+
+ if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
+ return Ok(8);
+ }
+ if magic_num != LZ4F_MAGIC_NUMBER {
+ return Err(Error::WrongMagicNumber);
+ }
+
+ if input[4] & FLG_CONTENT_SIZE != 0 {
+ required += 8;
+ }
+ if input[4] & FLG_DICTIONARY_ID != 0 {
+ required += 4
+ }
+ Ok(required)
+ }
+
+ pub(crate) fn write_size(&self) -> usize {
+ let mut required = MIN_FRAME_INFO_SIZE;
+ if self.content_size.is_some() {
+ required += 8;
+ }
+ if self.dict_id.is_some() {
+ required += 4;
+ }
+ required
+ }
+
+ pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> {
+ let write_size = self.write_size();
+ if output.len() < write_size {
+ return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into()));
+ }
+ let mut buffer = [0u8; MAX_FRAME_INFO_SIZE];
+ assert!(write_size <= buffer.len());
+ buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes());
+ buffer[4] = FLG_SUPPORTED_VERSION_BITS;
+ if self.block_checksums {
+ buffer[4] |= FLG_BLOCK_CHECKSUMS;
+ }
+ if self.content_checksum {
+ buffer[4] |= FLG_CONTENT_CHECKSUM;
+ }
+ if self.block_mode == BlockMode::Independent {
+ buffer[4] |= FLG_INDEPENDENT_BLOCKS;
+ }
+ buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT;
+
+ // Optional section
+ let mut offset = 6;
+ if let Some(size) = self.content_size {
+ buffer[4] |= FLG_CONTENT_SIZE;
+ buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes());
+ offset += 8;
+ }
+ if let Some(dict_id) = self.dict_id {
+ buffer[4] |= FLG_DICTIONARY_ID;
+ buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes());
+ offset += 4;
+ }
+
+ // Header checksum
+ let mut hasher = XxHash32::with_seed(0);
+ hasher.write(&buffer[4..offset]);
+ let header_checksum = (hasher.finish() >> 8) as u8;
+ buffer[offset] = header_checksum;
+ offset += 1;
+
+ debug_assert_eq!(offset, write_size);
+ output[..write_size].copy_from_slice(&buffer[..write_size]);
+ Ok(write_size)
+ }
+
+ pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> {
+ let original_input = input;
+ // 4 byte Magic
+ let magic_num = {
+ let mut buffer = [0u8; 4];
+ input.read_exact(&mut buffer)?;
+ u32::from_le_bytes(buffer)
+ };
+ if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
+ return Ok(FrameInfo {
+ block_size: BlockSize::Max8MB,
+ legacy_frame: true,
+ ..FrameInfo::default()
+ });
+ }
+ if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
+ let mut buffer = [0u8; 4];
+ input.read_exact(&mut buffer)?;
+ let user_data_len = u32::from_le_bytes(buffer);
+ return Err(Error::SkippableFrame(user_data_len));
+ }
+ if magic_num != LZ4F_MAGIC_NUMBER {
+ return Err(Error::WrongMagicNumber);
+ }
+
+ // fixed size section
+ let [flg_byte, bd_byte] = {
+ let mut buffer = [0u8, 0];
+ input.read_exact(&mut buffer)?;
+ buffer
+ };
+
+ if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS {
+ // version is always 01
+ return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK));
+ }
+
+ if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 {
+ return Err(Error::ReservedBitsSet);
+ }
+
+ let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 {
+ BlockMode::Independent
+ } else {
+ BlockMode::Linked
+ };
+ let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0;
+ let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0;
+
+ let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT {
+ i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)),
+ 4 => BlockSize::Max64KB,
+ 5 => BlockSize::Max256KB,
+ 6 => BlockSize::Max1MB,
+ 7 => BlockSize::Max4MB,
+ _ => unreachable!(),
+ };
+
+ // var len section
+ let mut content_size = None;
+ if flg_byte & FLG_CONTENT_SIZE != 0 {
+ let mut buffer = [0u8; 8];
+ input.read_exact(&mut buffer).unwrap();
+ content_size = Some(u64::from_le_bytes(buffer));
+ }
+
+ let mut dict_id = None;
+ if flg_byte & FLG_DICTIONARY_ID != 0 {
+ let mut buffer = [0u8; 4];
+ input.read_exact(&mut buffer)?;
+ dict_id = Some(u32::from_le_bytes(buffer));
+ }
+
+ // 1 byte header checksum
+ let expected_checksum = {
+ let mut buffer = [0u8; 1];
+ input.read_exact(&mut buffer)?;
+ buffer[0]
+ };
+ let mut hasher = XxHash32::with_seed(0);
+ hasher.write(&original_input[4..original_input.len() - input.len() - 1]);
+ let header_hash = (hasher.finish() >> 8) as u8;
+ if header_hash != expected_checksum {
+ return Err(Error::HeaderChecksumError);
+ }
+
+ Ok(FrameInfo {
+ content_size,
+ dict_id,
+ block_size,
+ block_mode,
+ block_checksums,
+ content_checksum,
+ legacy_frame: false,
+ })
+ }
+}
+
+#[derive(Debug)]
+pub(crate) enum BlockInfo {
+ Compressed(u32),
+ Uncompressed(u32),
+ EndMark,
+}
+
+impl BlockInfo {
+ pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> {
+ let mut size_buffer = [0u8; 4];
+ input.read_exact(&mut size_buffer)?;
+ let size = u32::from_le_bytes(size_buffer);
+ if size == 0 {
+ Ok(BlockInfo::EndMark)
+ } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 {
+ Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT))
+ } else {
+ Ok(BlockInfo::Compressed(size))
+ }
+ }
+
+ pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> {
+ let value = match self {
+ BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo),
+ BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len)
+ if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 =>
+ {
+ return Err(Error::InvalidBlockInfo)
+ }
+ BlockInfo::Compressed(len) => *len,
+ BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT,
+ BlockInfo::EndMark => 0,
+ };
+ output.write_all(&value.to_le_bytes())?;
+ Ok(4)
+ }
+}
diff --git a/src/frame/mod.rs b/src/frame/mod.rs
new file mode 100644
index 0000000..8acfad2
--- /dev/null
+++ b/src/frame/mod.rs
@@ -0,0 +1,111 @@
+//! LZ4 Frame Format
+//!
+//! As defined in <https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md>
+//!
+//! # Example: compress data on `stdin` with frame format
+//! This program reads data from `stdin`, compresses it and emits it to `stdout`.
+//! This example can be found in `examples/compress.rs`:
+//! ```no_run
+//! use std::io;
+//! let stdin = io::stdin();
+//! let stdout = io::stdout();
+//! let mut rdr = stdin.lock();
+//! // Wrap the stdout writer in a LZ4 Frame writer.
+//! let mut wtr = lz4_flex::frame::FrameEncoder::new(stdout.lock());
+//! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed");
+//! wtr.finish().unwrap();
+//! ```
+//!
+
+use std::{fmt, io};
+
+#[cfg_attr(feature = "safe-encode", forbid(unsafe_code))]
+pub(crate) mod compress;
+#[cfg_attr(feature = "safe-decode", forbid(unsafe_code))]
+pub(crate) mod decompress;
+pub(crate) mod header;
+
+pub use compress::{AutoFinishEncoder, FrameEncoder};
+pub use decompress::FrameDecoder;
+pub use header::{BlockMode, BlockSize, FrameInfo};
+
+#[derive(Debug)]
+#[non_exhaustive]
+/// Errors that can occur when de/compressing lz4.
+pub enum Error {
+ /// Compression error.
+ CompressionError(crate::block::CompressError),
+ /// Decompression error.
+ DecompressionError(crate::block::DecompressError),
+ /// An io::Error was encountered.
+ IoError(io::Error),
+ /// Unsupported block size.
+ UnsupportedBlocksize(u8),
+ /// Unsupported frame version.
+ UnsupportedVersion(u8),
+ /// Wrong magic number for the LZ4 frame format.
+ WrongMagicNumber,
+ /// Reserved bits set.
+ ReservedBitsSet,
+ /// Block header is malformed.
+ InvalidBlockInfo,
+ /// Read a block larger than specified in the Frame header.
+ BlockTooBig,
+ /// The Frame header checksum doesn't match.
+ HeaderChecksumError,
+ /// The block checksum doesn't match.
+ BlockChecksumError,
+ /// The content checksum doesn't match.
+ ContentChecksumError,
+ /// Read an skippable frame.
+ /// The caller may read the specified amount of bytes from the underlying io::Read.
+ SkippableFrame(u32),
+ /// External dictionaries are not supported.
+ DictionaryNotSupported,
+ /// Content length differs.
+ ContentLengthError {
+ /// Expected content length.
+ expected: u64,
+ /// Actual content lenght.
+ actual: u64,
+ },
+}
+
+impl From<Error> for io::Error {
+ fn from(e: Error) -> Self {
+ match e {
+ Error::IoError(e) => e,
+ Error::CompressionError(_)
+ | Error::DecompressionError(_)
+ | Error::SkippableFrame(_)
+ | Error::DictionaryNotSupported => io::Error::new(io::ErrorKind::Other, e),
+ Error::WrongMagicNumber
+ | Error::UnsupportedBlocksize(..)
+ | Error::UnsupportedVersion(..)
+ | Error::ReservedBitsSet
+ | Error::InvalidBlockInfo
+ | Error::BlockTooBig
+ | Error::HeaderChecksumError
+ | Error::ContentChecksumError
+ | Error::BlockChecksumError
+ | Error::ContentLengthError { .. } => io::Error::new(io::ErrorKind::InvalidData, e),
+ }
+ }
+}
+
+impl From<io::Error> for Error {
+ fn from(e: io::Error) -> Self {
+ match e.get_ref().map(|e| e.downcast_ref::<Error>()) {
+ Some(_) => *e.into_inner().unwrap().downcast::<Error>().unwrap(),
+ None => Error::IoError(e),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
+ write!(f, "{self:?}")
+ }
+}
+
+impl std::error::Error for Error {}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..103cd7e
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,109 @@
+//! Pure Rust, high performance implementation of LZ4 compression.
+//!
+//! A detailed explanation of the algorithm can be found [here](http://ticki.github.io/blog/how-lz4-works/).
+//!
+//! # Overview
+//!
+//! This crate provides two ways to use lz4. The first way is through the
+//! [`frame::FrameDecoder`](frame/struct.FrameDecoder.html)
+//! and
+//! [`frame::FrameEncoder`](frame/struct.FrameEncoder.html)
+//! types, which implement the `std::io::Read` and `std::io::Write` traits with the
+//! lz4 frame format. Unless you have a specific reason to the contrary, you
+//! should only use the lz4 frame format. Specifically, the lz4 frame format
+//! permits streaming compression or decompression.
+//!
+//! The second way is through the
+//! [`decompress_size_prepended`](fn.decompress_size_prepended.html)
+//! and
+//! [`compress_prepend_size`](fn.compress_prepend_size.html)
+//! functions. These functions provide access to the lz4 block format, and
+//! don't support a streaming interface directly. You should only use these types
+//! if you know you specifically need the lz4 block format.
+//!
+//! # Example: compress data on `stdin` with frame format
+//! This program reads data from `stdin`, compresses it and emits it to `stdout`.
+//! This example can be found in `examples/compress.rs`:
+//! ```no_run
+//! use std::io;
+//! let stdin = io::stdin();
+//! let stdout = io::stdout();
+//! let mut rdr = stdin.lock();
+//! // Wrap the stdout writer in a LZ4 Frame writer.
+//! let mut wtr = lz4_flex::frame::FrameEncoder::new(stdout.lock());
+//! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed");
+//! wtr.finish().unwrap();
+//! ```
+//! # Example: decompress data on `stdin` with frame format
+//! This program reads data from `stdin`, decompresses it and emits it to `stdout`.
+//! This example can be found in `examples/decompress.rs`:
+//! ```no_run
+//! use std::io;
+//! let stdin = io::stdin();
+//! let stdout = io::stdout();
+//! // Wrap the stdin reader in a LZ4 FrameDecoder.
+//! let mut rdr = lz4_flex::frame::FrameDecoder::new(stdin.lock());
+//! let mut wtr = stdout.lock();
+//! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed");
+//! ```
+//!
+//! # Example: block format roundtrip
+//! ```
+//! use lz4_flex::block::{compress_prepend_size, decompress_size_prepended};
+//! let input: &[u8] = b"Hello people, what's up?";
+//! let compressed = compress_prepend_size(input);
+//! let uncompressed = decompress_size_prepended(&compressed).unwrap();
+//! assert_eq!(input, uncompressed);
+//! ```
+//!
+//! ## Feature Flags
+//!
+//! - `safe-encode` uses only safe rust for encode. _enabled by default_
+//! - `safe-decode` uses only safe rust for encode. _enabled by default_
+//! - `frame` support for LZ4 frame format. _implies `std`, enabled by default_
+//! - `std` enables dependency on the standard library. _enabled by default_
+//!
+//! For maximum performance use `no-default-features`.
+//!
+//! For no_std support only the [`block format`](block/index.html) is supported.
+//!
+//!
+#![deny(warnings)]
+#![deny(missing_docs)]
+#![cfg_attr(not(feature = "std"), no_std)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![cfg_attr(nightly, feature(optimize_attribute))]
+
+#[cfg_attr(test, macro_use)]
+extern crate alloc;
+
+#[cfg(test)]
+#[macro_use]
+extern crate more_asserts;
+
+pub mod block;
+#[cfg(feature = "frame")]
+#[cfg_attr(docsrs, doc(cfg(feature = "frame")))]
+pub mod frame;
+
+#[allow(dead_code)]
+mod fastcpy;
+#[allow(dead_code)]
+mod fastcpy_unsafe;
+
+#[deprecated(
+ since = "0.11.0",
+ note = "This re-export is deprecated as it can be confused with the frame API and is not suitable for very large data, use block:: instead"
+)]
+pub use block::{compress, compress_into, compress_prepend_size};
+#[deprecated(
+ since = "0.11.0",
+ note = "This re-export is deprecated as it can be confused with the frame API and is not suitable for very large data, use block:: instead"
+)]
+pub use block::{decompress, decompress_into, decompress_size_prepended};
+
+#[cfg_attr(
+ all(feature = "safe-encode", feature = "safe-decode"),
+ forbid(unsafe_code)
+)]
+pub(crate) mod sink;
diff --git a/src/sink.rs b/src/sink.rs
new file mode 100644
index 0000000..a440a2d
--- /dev/null
+++ b/src/sink.rs
@@ -0,0 +1,330 @@
+#[allow(unused_imports)]
+use alloc::vec::Vec;
+
+use crate::fastcpy::slice_copy;
+
+/// Returns a Sink implementation appropriate for outputing up to `required_capacity`
+/// bytes at `vec[offset..offset+required_capacity]`.
+/// It can be either a `SliceSink` (pre-filling the vec with zeroes if necessary)
+/// when the `safe-decode` feature is enabled, or `VecSink` otherwise.
+/// The argument `pos` defines the initial output position in the Sink.
+#[inline]
+#[cfg(feature = "frame")]
+pub fn vec_sink_for_compression(
+ vec: &mut Vec<u8>,
+ offset: usize,
+ pos: usize,
+ required_capacity: usize,
+) -> SliceSink {
+ return {
+ vec.resize(offset + required_capacity, 0);
+ SliceSink::new(&mut vec[offset..], pos)
+ };
+}
+
+/// Returns a Sink implementation appropriate for outputing up to `required_capacity`
+/// bytes at `vec[offset..offset+required_capacity]`.
+/// It can be either a `SliceSink` (pre-filling the vec with zeroes if necessary)
+/// when the `safe-decode` feature is enabled, or `VecSink` otherwise.
+/// The argument `pos` defines the initial output position in the Sink.
+#[cfg(feature = "frame")]
+#[inline]
+pub fn vec_sink_for_decompression(
+ vec: &mut Vec<u8>,
+ offset: usize,
+ pos: usize,
+ required_capacity: usize,
+) -> SliceSink {
+ return {
+ vec.resize(offset + required_capacity, 0);
+ SliceSink::new(&mut vec[offset..], pos)
+ };
+}
+
+pub trait Sink {
+ /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`.
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ unsafe fn pos_mut_ptr(&mut self) -> *mut u8;
+
+ /// read byte at position
+ fn byte_at(&mut self, pos: usize) -> u8;
+
+ /// Pushes a byte to the end of the Sink.
+ #[cfg(feature = "safe-encode")]
+ fn push(&mut self, byte: u8);
+
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ unsafe fn base_mut_ptr(&mut self) -> *mut u8;
+
+ fn pos(&self) -> usize;
+
+ fn capacity(&self) -> usize;
+
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ unsafe fn set_pos(&mut self, new_pos: usize);
+
+ #[cfg(feature = "safe-decode")]
+ fn extend_with_fill(&mut self, byte: u8, len: usize);
+
+ /// Extends the Sink with `data`.
+ fn extend_from_slice(&mut self, data: &[u8]);
+
+ fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize);
+
+ /// Copies `len` bytes starting from `start` to the end of the Sink.
+ /// # Panics
+ /// Panics if `start` >= `pos`.
+ #[cfg(feature = "safe-decode")]
+ fn extend_from_within(&mut self, start: usize, wild_len: usize, copy_len: usize);
+
+ #[cfg(feature = "safe-decode")]
+ fn extend_from_within_overlapping(&mut self, start: usize, num_bytes: usize);
+}
+
+/// SliceSink is used as target to de/compress data into a preallocated and possibly uninitialized
+/// `&[u8]`
+/// space.
+///
+/// # Handling of Capacity
+/// Extend methods will panic if there's insufficient capacity left in the Sink.
+///
+/// # Invariants
+/// - Bytes `[..pos()]` are always initialized.
+pub struct SliceSink<'a> {
+ /// The working slice, which may contain uninitialized bytes
+ output: &'a mut [u8],
+ /// Number of bytes in start of `output` guaranteed to be initialized
+ pos: usize,
+}
+
+impl<'a> SliceSink<'a> {
+ /// Creates a `Sink` backed by the given byte slice.
+ /// `pos` defines the initial output position in the Sink.
+ /// # Panics
+ /// Panics if `pos` is out of bounds.
+ #[inline]
+ pub fn new(output: &'a mut [u8], pos: usize) -> Self {
+ // SAFETY: Caller guarantees that all elements of `output[..pos]` are initialized.
+ let _ = &mut output[..pos]; // bounds check pos
+ SliceSink { output, pos }
+ }
+}
+
+impl<'a> Sink for SliceSink<'a> {
+ /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`.
+ #[inline]
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ unsafe fn pos_mut_ptr(&mut self) -> *mut u8 {
+ self.base_mut_ptr().add(self.pos()) as *mut u8
+ }
+
+ /// Pushes a byte to the end of the Sink.
+ #[inline]
+ fn byte_at(&mut self, pos: usize) -> u8 {
+ self.output[pos]
+ }
+
+ /// Pushes a byte to the end of the Sink.
+ #[inline]
+ #[cfg(feature = "safe-encode")]
+ fn push(&mut self, byte: u8) {
+ self.output[self.pos] = byte;
+ self.pos += 1;
+ }
+
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ unsafe fn base_mut_ptr(&mut self) -> *mut u8 {
+ self.output.as_mut_ptr()
+ }
+
+ #[inline]
+ fn pos(&self) -> usize {
+ self.pos
+ }
+
+ #[inline]
+ fn capacity(&self) -> usize {
+ self.output.len()
+ }
+
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ #[inline]
+ unsafe fn set_pos(&mut self, new_pos: usize) {
+ debug_assert!(new_pos <= self.capacity());
+ self.pos = new_pos;
+ }
+
+ #[inline]
+ #[cfg(feature = "safe-decode")]
+ fn extend_with_fill(&mut self, byte: u8, len: usize) {
+ self.output[self.pos..self.pos + len].fill(byte);
+ self.pos += len;
+ }
+
+ /// Extends the Sink with `data`.
+ #[inline]
+ fn extend_from_slice(&mut self, data: &[u8]) {
+ self.extend_from_slice_wild(data, data.len())
+ }
+
+ #[inline]
+ fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize) {
+ assert!(copy_len <= data.len());
+ slice_copy(data, &mut self.output[self.pos..(self.pos) + data.len()]);
+ self.pos += copy_len;
+ }
+
+ /// Copies `len` bytes starting from `start` to the end of the Sink.
+ /// # Panics
+ /// Panics if `start` >= `pos`.
+ #[inline]
+ #[cfg(feature = "safe-decode")]
+ fn extend_from_within(&mut self, start: usize, wild_len: usize, copy_len: usize) {
+ self.output.copy_within(start..start + wild_len, self.pos);
+ self.pos += copy_len;
+ }
+
+ #[inline]
+ #[cfg(feature = "safe-decode")]
+ #[cfg_attr(nightly, optimize(size))] // to avoid loop unrolling
+ fn extend_from_within_overlapping(&mut self, start: usize, num_bytes: usize) {
+ let offset = self.pos - start;
+ for i in start + offset..start + offset + num_bytes {
+ self.output[i] = self.output[i - offset];
+ }
+ self.pos += num_bytes;
+ }
+}
+
+/// PtrSink is used as target to de/compress data into a preallocated and possibly uninitialized
+/// `&[u8]`
+/// space.
+///
+///
+#[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+pub struct PtrSink {
+ /// The working slice, which may contain uninitialized bytes
+ output: *mut u8,
+ /// Number of bytes in start of `output` guaranteed to be initialized
+ pos: usize,
+ /// Number of bytes in output available
+ cap: usize,
+}
+
+#[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+impl PtrSink {
+ /// Creates a `Sink` backed by the given byte slice.
+ /// `pos` defines the initial output position in the Sink.
+ /// # Panics
+ /// Panics if `pos` is out of bounds.
+ #[inline]
+ pub fn from_vec(output: &mut Vec<u8>, pos: usize) -> Self {
+ // SAFETY: Bytes behind pointer may be uninitialized.
+ Self {
+ output: output.as_mut_ptr(),
+ pos,
+ cap: output.capacity(),
+ }
+ }
+}
+
+#[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+impl Sink for PtrSink {
+ /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`.
+ #[inline]
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ unsafe fn pos_mut_ptr(&mut self) -> *mut u8 {
+ self.base_mut_ptr().add(self.pos()) as *mut u8
+ }
+
+ /// Pushes a byte to the end of the Sink.
+ #[inline]
+ fn byte_at(&mut self, pos: usize) -> u8 {
+ unsafe { self.output.add(pos).read() }
+ }
+
+ /// Pushes a byte to the end of the Sink.
+ #[inline]
+ #[cfg(feature = "safe-encode")]
+ fn push(&mut self, byte: u8) {
+ unsafe {
+ self.pos_mut_ptr().write(byte);
+ }
+ self.pos += 1;
+ }
+
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ unsafe fn base_mut_ptr(&mut self) -> *mut u8 {
+ self.output
+ }
+
+ #[inline]
+ fn pos(&self) -> usize {
+ self.pos
+ }
+
+ #[inline]
+ fn capacity(&self) -> usize {
+ self.cap
+ }
+
+ #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))]
+ #[inline]
+ unsafe fn set_pos(&mut self, new_pos: usize) {
+ debug_assert!(new_pos <= self.capacity());
+ self.pos = new_pos;
+ }
+
+ #[inline]
+ #[cfg(feature = "safe-decode")]
+ fn extend_with_fill(&mut self, _byte: u8, _len: usize) {
+ unreachable!();
+ }
+
+ /// Extends the Sink with `data`.
+ #[inline]
+ fn extend_from_slice(&mut self, data: &[u8]) {
+ self.extend_from_slice_wild(data, data.len())
+ }
+
+ #[inline]
+ fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize) {
+ assert!(copy_len <= data.len());
+ unsafe {
+ core::ptr::copy_nonoverlapping(data.as_ptr(), self.pos_mut_ptr(), copy_len);
+ }
+ self.pos += copy_len;
+ }
+
+ /// Copies `len` bytes starting from `start` to the end of the Sink.
+ /// # Panics
+ /// Panics if `start` >= `pos`.
+ #[inline]
+ #[cfg(feature = "safe-decode")]
+ fn extend_from_within(&mut self, _start: usize, _wild_len: usize, _copy_len: usize) {
+ unreachable!();
+ }
+
+ #[inline]
+ #[cfg(feature = "safe-decode")]
+ fn extend_from_within_overlapping(&mut self, _start: usize, _num_bytes: usize) {
+ unreachable!();
+ }
+}
+
+#[cfg(test)]
+mod tests {
+
+ #[test]
+ #[cfg(any(feature = "safe-encode", feature = "safe-decode"))]
+ fn test_sink_slice() {
+ use crate::sink::Sink;
+ use crate::sink::SliceSink;
+ use alloc::vec::Vec;
+ let mut data = Vec::new();
+ data.resize(5, 0);
+ let sink = SliceSink::new(&mut data, 1);
+ assert_eq!(sink.pos(), 1);
+ assert_eq!(sink.capacity(), 5);
+ }
+}