diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-02-02 23:52:52 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-02-02 23:52:52 +0000 |
commit | 14c2689d69ac34943f2ea87dd175f87616bd2ab3 (patch) | |
tree | b42294cc4406c822c45ba56c215db6d591a207d1 | |
parent | 07727135e0b5b979ed4cafd54238bb6a09002ccc (diff) | |
parent | 666ceb431cd721b5d90463af6d32270a645b6093 (diff) | |
download | flate2-simpleperf-release.tar.gz |
Snap for 11400057 from 666ceb431cd721b5d90463af6d32270a645b6093 to simpleperf-releasesimpleperf-release
Change-Id: If0eba1f6d33cb7d6409617321df2b74791b05c3a
56 files changed, 1351 insertions, 978 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index bfa7c9d..fc9abfc 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,6 +1,6 @@ { "git": { - "sha1": "8431d9e0c0fdaea16c4643c723631223802b2c86" + "sha1": "a99b53ec65d3c2e3f703e5e865d5e886a23a83dc" }, "path_in_vcs": "" }
\ No newline at end of file diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml new file mode 100644 index 0000000..ea8cae6 --- /dev/null +++ b/.github/workflows/cifuzz.yml @@ -0,0 +1,26 @@ +name: CIFuzz +on: [pull_request] +jobs: + Fuzzing: + runs-on: ubuntu-latest + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'flate2-rs' + dry-run: false + language: rust + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'flate2-rs' + fuzz-seconds: 180 + dry-run: false + language: rust + - name: Upload Crash + uses: actions/upload-artifact@v3 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9cdaa50..ba72877 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,13 +23,13 @@ jobs: os: macos-latest rust: stable - build: windows - os: windows-2019 + os: windows-2022 rust: stable - build: mingw - os: windows-2019 + os: windows-2022 rust: stable-x86_64-gnu steps: - - uses: actions/checkout@master + - uses: actions/checkout@v3 - name: Install Rust (rustup) run: rustup update ${{ matrix.rust }} --no-self-update && rustup default ${{ matrix.rust }} shell: bash @@ -38,18 +38,25 @@ jobs: - run: cargo test - run: cargo test --features zlib - run: cargo test --features zlib --no-default-features + - run: cargo test --features zlib-default --no-default-features - run: cargo test --features zlib-ng-compat --no-default-features if: matrix.build != 'mingw' - run: cargo test --features zlib-ng --no-default-features if: matrix.build != 'mingw' - run: cargo test --features cloudflare_zlib --no-default-features if: matrix.build != 'mingw' + - run: | + if ! cargo check --no-default-features 2>&1 | grep "You need to choose"; then + echo "expected message stating a zlib backend must be chosen" + exit 1 + fi + if: matrix.build == 'stable' rustfmt: name: Rustfmt runs-on: ubuntu-latest steps: - - uses: actions/checkout@master + - uses: actions/checkout@v3 - name: Install Rust run: rustup update stable && rustup default stable && rustup component add rustfmt - run: cargo fmt -- --check @@ -61,7 +68,7 @@ jobs: matrix: target: [wasm32-unknown-unknown, wasm32-wasi] steps: - - uses: actions/checkout@master + - uses: actions/checkout@v3 - name: Install Rust run: rustup update stable && rustup default stable && rustup target add ${{ matrix.target }} - run: cargo build --target ${{ matrix.target }} @@ -41,7 +41,7 @@ rust_test { name: "flate2_test_src_lib", crate_name: "flate2", cargo_env_compat: true, - cargo_pkg_version: "1.0.25", + cargo_pkg_version: "1.0.28", srcs: ["src/lib.rs"], test_suites: ["general-tests"], auto_gen_config: true, @@ -50,6 +50,7 @@ rust_test { }, edition: "2018", features: [ + "any_impl", "any_zlib", "libz-sys", "zlib", @@ -66,7 +67,7 @@ rust_test { name: "flate2_test_tests_early-flush", crate_name: "early_flush", cargo_env_compat: true, - cargo_pkg_version: "1.0.25", + cargo_pkg_version: "1.0.28", srcs: ["tests/early-flush.rs"], test_suites: ["general-tests"], auto_gen_config: true, @@ -75,6 +76,7 @@ rust_test { }, edition: "2018", features: [ + "any_impl", "any_zlib", "libz-sys", "zlib", @@ -92,7 +94,7 @@ rust_test { name: "flate2_test_tests_empty-read", crate_name: "empty_read", cargo_env_compat: true, - cargo_pkg_version: "1.0.25", + cargo_pkg_version: "1.0.28", srcs: ["tests/empty-read.rs"], test_suites: ["general-tests"], auto_gen_config: true, @@ -101,6 +103,7 @@ rust_test { }, edition: "2018", features: [ + "any_impl", "any_zlib", "libz-sys", "zlib", @@ -118,7 +121,7 @@ rust_test { name: "flate2_test_tests_gunzip", crate_name: "gunzip", cargo_env_compat: true, - cargo_pkg_version: "1.0.25", + cargo_pkg_version: "1.0.28", srcs: ["tests/gunzip.rs"], test_suites: ["general-tests"], auto_gen_config: true, @@ -127,6 +130,7 @@ rust_test { }, edition: "2018", features: [ + "any_impl", "any_zlib", "libz-sys", "zlib", @@ -151,7 +155,7 @@ rust_test { name: "flate2_test_tests_zero-write", crate_name: "zero_write", cargo_env_compat: true, - cargo_pkg_version: "1.0.25", + cargo_pkg_version: "1.0.28", srcs: ["tests/zero-write.rs"], test_suites: ["general-tests"], auto_gen_config: true, @@ -160,6 +164,7 @@ rust_test { }, edition: "2018", features: [ + "any_impl", "any_zlib", "libz-sys", "zlib", @@ -178,10 +183,11 @@ rust_library { host_supported: true, crate_name: "flate2", cargo_env_compat: true, - cargo_pkg_version: "1.0.25", + cargo_pkg_version: "1.0.28", srcs: ["src/lib.rs"], edition: "2018", features: [ + "any_impl", "any_zlib", "libz-sys", "zlib", diff --git a/Android.bp.orig b/Android.bp.orig new file mode 100644 index 0000000..2a307f8 --- /dev/null +++ b/Android.bp.orig @@ -0,0 +1,210 @@ +// This file is generated by cargo_embargo. +// Do not modify this file as changes will be overridden on upgrade. + +package { + default_applicable_licenses: ["external_rust_crates_flate2_license"], +} + +// Added automatically by a large-scale-change that took the approach of +// 'apply every license found to every target'. While this makes sure we respect +// every license restriction, it may not be entirely correct. +// +// e.g. GPL in an MIT project might only apply to the contrib/ directory. +// +// Please consider splitting the single license below into multiple licenses, +// taking care not to lose any license_kind information, and overriding the +// default license using the 'licenses: [...]' property on targets as needed. +// +// For unused files, consider creating a 'fileGroup' with "//visibility:private" +// to attach the license to, and including a comment whether the files may be +// used in the current project. +// +// large-scale-change included anything that looked like it might be a license +// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc. +// +// Please consider removing redundant or irrelevant files from 'license_text:'. +// See: http://go/android-license-faq +license { + name: "external_rust_crates_flate2_license", + visibility: [":__subpackages__"], + license_kinds: [ + "SPDX-license-identifier-Apache-2.0", + "SPDX-license-identifier-MIT", + ], + license_text: [ + "LICENSE-APACHE", + "LICENSE-MIT", + ], +} + +rust_test { + name: "flate2_test_src_lib", + host_supported: true, + crate_name: "flate2", + cargo_env_compat: true, + cargo_pkg_version: "1.0.28", + srcs: ["src/lib.rs"], + test_suites: ["general-tests"], + auto_gen_config: true, + test_options: { + unit_test: true, + }, + edition: "2018", + features: [ + "any_impl", + "any_zlib", + "libz-sys", + "zlib", + ], + rustlibs: [ + "libcrc32fast", + "liblibz_sys", + "libquickcheck", + "librand", + ], +} + +rust_test { + name: "flate2_test_tests_early-flush", + host_supported: true, + crate_name: "early_flush", + cargo_env_compat: true, + cargo_pkg_version: "1.0.28", + srcs: ["tests/early-flush.rs"], + test_suites: ["general-tests"], + auto_gen_config: true, + test_options: { + unit_test: true, + }, + edition: "2018", + features: [ + "any_impl", + "any_zlib", + "libz-sys", + "zlib", + ], + rustlibs: [ + "libcrc32fast", + "libflate2", + "liblibz_sys", + "libquickcheck", + "librand", + ], +} + +rust_test { + name: "flate2_test_tests_empty-read", + host_supported: true, + crate_name: "empty_read", + cargo_env_compat: true, + cargo_pkg_version: "1.0.28", + srcs: ["tests/empty-read.rs"], + test_suites: ["general-tests"], + auto_gen_config: true, + test_options: { + unit_test: true, + }, + edition: "2018", + features: [ + "any_impl", + "any_zlib", + "libz-sys", + "zlib", + ], + rustlibs: [ + "libcrc32fast", + "libflate2", + "liblibz_sys", + "libquickcheck", + "librand", + ], +} + +rust_test { + name: "flate2_test_tests_gunzip", + host_supported: true, + crate_name: "gunzip", + cargo_env_compat: true, + cargo_pkg_version: "1.0.28", + srcs: ["tests/gunzip.rs"], + test_suites: ["general-tests"], + auto_gen_config: true, + test_options: { + unit_test: true, + }, + edition: "2018", + features: [ + "any_impl", + "any_zlib", + "libz-sys", + "zlib", + ], + rustlibs: [ + "libcrc32fast", + "libflate2", + "liblibz_sys", + "libquickcheck", + "librand", + ], + data: [ + "tests/corrupt-gz-file.bin", + "tests/good-file.gz", + "tests/good-file.txt", + "tests/multi.gz", + "tests/multi.txt", + ], +} + +rust_test { + name: "flate2_test_tests_zero-write", + host_supported: true, + crate_name: "zero_write", + cargo_env_compat: true, + cargo_pkg_version: "1.0.28", + srcs: ["tests/zero-write.rs"], + test_suites: ["general-tests"], + auto_gen_config: true, + test_options: { + unit_test: true, + }, + edition: "2018", + features: [ + "any_impl", + "any_zlib", + "libz-sys", + "zlib", + ], + rustlibs: [ + "libcrc32fast", + "libflate2", + "liblibz_sys", + "libquickcheck", + "librand", + ], +} + +rust_library { + name: "libflate2", + host_supported: true, + crate_name: "flate2", + cargo_env_compat: true, + cargo_pkg_version: "1.0.28", + srcs: ["src/lib.rs"], + edition: "2018", + features: [ + "any_impl", + "any_zlib", + "libz-sys", + "zlib", + ], + rustlibs: [ + "libcrc32fast", + "liblibz_sys", + ], + apex_available: [ + "//apex_available:platform", + "com.android.virt", + ], + product_available: true, + vendor_available: true, +} diff --git a/Cargo.lock.saved b/Cargo.lock.saved deleted file mode 100644 index 5389eec..0000000 --- a/Cargo.lock.saved +++ /dev/null @@ -1,173 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "cc" -version = "1.0.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "cloudflare-zlib-sys" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2040b6d1edfee6d75f172d81e2d2a7807534f3f294ce18184c70e7bb0105cd6f" -dependencies = [ - "cc", -] - -[[package]] -name = "cmake" -version = "0.1.49" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db34956e100b30725f2eb215f90d4871051239535632f84fea3bc92722c66b7c" -dependencies = [ - "cc", -] - -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "flate2" -version = "1.0.25" -dependencies = [ - "cloudflare-zlib-sys", - "crc32fast", - "libz-ng-sys", - "libz-sys", - "miniz_oxide", - "quickcheck", - "rand", -] - -[[package]] -name = "getrandom" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "libc" -version = "0.2.137" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" - -[[package]] -name = "libz-ng-sys" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4399ae96a9966bf581e726de86969f803a81b7ce795fcd5480e640589457e0f2" -dependencies = [ - "cmake", - "libc", -] - -[[package]] -name = "libz-sys" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf" -dependencies = [ - "cc", - "cmake", - "libc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "miniz_oxide" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" -dependencies = [ - "adler", -] - -[[package]] -name = "pkg-config" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "quickcheck" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "rand", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" @@ -12,7 +12,7 @@ [package] edition = "2018" name = "flate2" -version = "1.0.25" +version = "1.0.28" authors = [ "Alex Crichton <alex@alexcrichton.com>", "Josh Triplett <josh@joshtriplett.org>", @@ -39,6 +39,13 @@ categories = [ license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/flate2-rs" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + [dependencies.cloudflare-zlib-sys] version = "0.3.0" optional = true @@ -56,7 +63,7 @@ optional = true default-features = false [dependencies.miniz_oxide] -version = "0.6.0" +version = "0.7.1" features = ["with-alloc"] optional = true default-features = false @@ -69,18 +76,26 @@ default-features = false version = "0.8" [features] -any_zlib = [] +any_impl = [] +any_zlib = ["any_impl"] cloudflare_zlib = [ "any_zlib", "cloudflare-zlib-sys", ] default = ["rust_backend"] miniz-sys = ["rust_backend"] -rust_backend = ["miniz_oxide"] +rust_backend = [ + "miniz_oxide", + "any_impl", +] zlib = [ "any_zlib", "libz-sys", ] +zlib-default = [ + "any_zlib", + "libz-sys/default", +] zlib-ng = [ "any_zlib", "libz-ng-sys", @@ -91,6 +106,6 @@ zlib-ng-compat = [ ] [target."cfg(all(target_arch = \"wasm32\", not(target_os = \"emscripten\")))".dependencies.miniz_oxide] -version = "0.6.0" +version = "0.7.1" features = ["with-alloc"] default-features = false diff --git a/Cargo.toml.orig b/Cargo.toml.orig index aa9a4fc..b59d2a6 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,7 +1,7 @@ [package] name = "flate2" authors = ["Alex Crichton <alex@alexcrichton.com>", "Josh Triplett <josh@joshtriplett.org>"] -version = "1.0.25" +version = "1.0.28" edition = "2018" license = "MIT OR Apache-2.0" readme = "README.md" @@ -20,11 +20,11 @@ and raw deflate streams. libz-sys = { version = "1.1.8", optional = true, default-features = false } libz-ng-sys = { version = "1.1.8", optional = true } cloudflare-zlib-sys = { version = "0.3.0", optional = true } -miniz_oxide = { version = "0.6.0", optional = true, default-features = false, features = ["with-alloc"] } +miniz_oxide = { version = "0.7.1", optional = true, default-features = false, features = ["with-alloc"] } crc32fast = "1.2.0" [target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies] -miniz_oxide = { version = "0.6.0", default-features = false, features = ["with-alloc"] } +miniz_oxide = { version = "0.7.1", default-features = false, features = ["with-alloc"] } [dev-dependencies] rand = "0.8" @@ -32,10 +32,16 @@ quickcheck = { version = "1.0", default-features = false } [features] default = ["rust_backend"] -any_zlib = [] # note: this is not a real user-facing feature +any_zlib = ["any_impl"] # note: this is not a real user-facing feature +any_impl = [] # note: this is not a real user-facing feature zlib = ["any_zlib", "libz-sys"] +zlib-default = ["any_zlib", "libz-sys/default"] zlib-ng-compat = ["zlib", "libz-sys/zlib-ng"] zlib-ng = ["any_zlib", "libz-ng-sys"] cloudflare_zlib = ["any_zlib", "cloudflare-zlib-sys"] -rust_backend = ["miniz_oxide"] +rust_backend = ["miniz_oxide", "any_impl"] miniz-sys = ["rust_backend"] # For backwards compatibility + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] diff --git a/MAINTENANCE.md b/MAINTENANCE.md new file mode 100644 index 0000000..c032c25 --- /dev/null +++ b/MAINTENANCE.md @@ -0,0 +1,21 @@ +This document explains how to perform the project's maintenance tasks. + +### Creating a new release + +#### Artifacts + +* a tag of the version number +* a new [crate version](https://crates.io/crates/flate2/versions) + +#### Process + +To generate all the artifacts above, one proceeds as follows: + +1. `git checkout -b release-<next-version>` - move to a branch to prepare making changes to the repository. *Changes cannot be made to `main` as it is protected.* +2. Edit `Cargo.toml` to the next package version. +3. `gh pr create` to create a new PR for the current branch and **get it merged**. +4. `cargo publish` to create a new release on `crates.io`. +5. `git tag <next-version>` to remember the commit. +6. `git push --tags` to push the new tag. +7. Go to the newly created release page on GitHub and edit it by pressing the "Generate Release Notes" and the `@` button. Save the release. + @@ -1,23 +1,20 @@ # This project was upgraded with external_updater. -# Usage: tools/external_updater/updater.sh update rust/crates/flate2 -# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md +# Usage: tools/external_updater/updater.sh update external/rust/crates/flate2 +# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md name: "flate2" description: "DEFLATE compression and decompression exposed as Read/BufRead/Write streams. Supports miniz_oxide, miniz.c, and multiple zlib implementations. Supports zlib, gzip, and raw deflate streams." third_party { - url { - type: HOMEPAGE - value: "https://crates.io/crates/flate2" - } - url { - type: ARCHIVE - value: "https://static.crates.io/crates/flate2/flate2-1.0.25.crate" - } - version: "1.0.25" license_type: NOTICE last_upgrade_date { - year: 2022 - month: 12 - day: 9 + year: 2024 + month: 2 + day: 1 + } + homepage: "https://crates.io/crates/flate2" + identifier { + type: "Archive" + value: "https://static.crates.io/crates/flate2/flate2-1.0.28.crate" + version: "1.0.28" } } @@ -6,7 +6,7 @@ A streaming compression/decompression library DEFLATE-based streams in Rust. This crate by default uses the `miniz_oxide` crate, a port of `miniz.c` to pure -Rust. This crate also supports other [backends](#Backends), such as the widely +Rust. This crate also supports other [backends](#backends), such as the widely available zlib library or the high-performance zlib-ng library. Supported formats: @@ -21,6 +21,12 @@ Supported formats: flate2 = "1.0" ``` +## MSRV (Minimum Supported Rust Version) Policy + +This crate supports the current stable and the last stable for the latest version. +For example, if the current stable is 1.64, this crate supports 1.64 and 1.63. +Older stables may work, but we don't guarantee these will continue to work. + ## Compression ```rust diff --git a/examples/compress_file.rs b/examples/compress_file.rs index 39ed8ee..be4b81e 100644 --- a/examples/compress_file.rs +++ b/examples/compress_file.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::GzEncoder; use flate2::Compression; use std::env::args; diff --git a/examples/decompress_file.rs b/examples/decompress_file.rs new file mode 100644 index 0000000..6b2ede9 --- /dev/null +++ b/examples/decompress_file.rs @@ -0,0 +1,23 @@ +use flate2::bufread; +use std::env::args; +use std::fs::File; +use std::io::copy; +use std::io::BufReader; +use std::time::Instant; + +fn main() { + // E.g. `cargo run --example decompress_file examples/hello_world.txt.gz hello_world.txt` + if args().len() != 3 { + eprintln!("Usage: ./decompress_file `source` `target`"); + return; + } + let input = BufReader::new(File::open(args().nth(1).unwrap()).unwrap()); + let mut output = File::create(args().nth(2).unwrap()).unwrap(); + let source_len = input.get_ref().metadata().unwrap().len(); + let start = Instant::now(); + let mut decoder = bufread::GzDecoder::new(input); + copy(&mut decoder, &mut output).unwrap(); + println!("Source len: {:?}", source_len); + println!("Target len: {:?}", output.metadata().unwrap().len()); + println!("Elapsed: {:?}", start.elapsed()); +} diff --git a/examples/deflatedecoder-bufread.rs b/examples/deflatedecoder-bufread.rs index 7d1cb6f..6b8e7fe 100644 --- a/examples/deflatedecoder-bufread.rs +++ b/examples/deflatedecoder-bufread.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::bufread::DeflateDecoder; use flate2::write::DeflateEncoder; use flate2::Compression; diff --git a/examples/deflatedecoder-read.rs b/examples/deflatedecoder-read.rs index cc5d435..1bc83bc 100644 --- a/examples/deflatedecoder-read.rs +++ b/examples/deflatedecoder-read.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::read::DeflateDecoder; use flate2::write::DeflateEncoder; use flate2::Compression; diff --git a/examples/deflatedecoder-write.rs b/examples/deflatedecoder-write.rs index 276490b..b9ba7a3 100644 --- a/examples/deflatedecoder-write.rs +++ b/examples/deflatedecoder-write.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::DeflateDecoder; use flate2::write::DeflateEncoder; use flate2::Compression; diff --git a/examples/deflateencoder-bufread.rs b/examples/deflateencoder-bufread.rs index 6240f39..bff1848 100644 --- a/examples/deflateencoder-bufread.rs +++ b/examples/deflateencoder-bufread.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::bufread::DeflateEncoder; use flate2::Compression; use std::fs::File; diff --git a/examples/deflateencoder-read.rs b/examples/deflateencoder-read.rs index ffb628e..366c60c 100644 --- a/examples/deflateencoder-read.rs +++ b/examples/deflateencoder-read.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::read::DeflateEncoder; use flate2::Compression; use std::io; diff --git a/examples/deflateencoder-write.rs b/examples/deflateencoder-write.rs index 243b9df..f998e1a 100644 --- a/examples/deflateencoder-write.rs +++ b/examples/deflateencoder-write.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::DeflateEncoder; use flate2::Compression; use std::io::prelude::*; diff --git a/examples/gzbuilder.rs b/examples/gzbuilder.rs index d6ec2f4..c072508 100644 --- a/examples/gzbuilder.rs +++ b/examples/gzbuilder.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::Compression; use flate2::GzBuilder; use std::fs::File; diff --git a/examples/gzdecoder-bufread.rs b/examples/gzdecoder-bufread.rs index 8551197..06d0448 100644 --- a/examples/gzdecoder-bufread.rs +++ b/examples/gzdecoder-bufread.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::GzEncoder; use flate2::{bufread, Compression}; use std::io; diff --git a/examples/gzdecoder-read.rs b/examples/gzdecoder-read.rs index 705d28c..fbd05be 100644 --- a/examples/gzdecoder-read.rs +++ b/examples/gzdecoder-read.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::GzEncoder; use flate2::{read, Compression}; use std::io; diff --git a/examples/gzdecoder-write.rs b/examples/gzdecoder-write.rs index 766bb5e..6cee8f8 100644 --- a/examples/gzdecoder-write.rs +++ b/examples/gzdecoder-write.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::{GzDecoder, GzEncoder}; use flate2::Compression; use std::io; diff --git a/examples/gzencoder-bufread.rs b/examples/gzencoder-bufread.rs index 015ae0a..2214e2d 100644 --- a/examples/gzencoder-bufread.rs +++ b/examples/gzencoder-bufread.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::bufread::GzEncoder; use flate2::Compression; use std::fs::File; diff --git a/examples/gzencoder-read.rs b/examples/gzencoder-read.rs index 3f1262b..ee13848 100644 --- a/examples/gzencoder-read.rs +++ b/examples/gzencoder-read.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::read::GzEncoder; use flate2::Compression; use std::io; diff --git a/examples/gzencoder-write.rs b/examples/gzencoder-write.rs index 275b010..cee01d1 100644 --- a/examples/gzencoder-write.rs +++ b/examples/gzencoder-write.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::GzEncoder; use flate2::Compression; use std::io::prelude::*; diff --git a/examples/gzmultidecoder-bufread.rs b/examples/gzmultidecoder-bufread.rs index c6bb2c5..692b6f5 100644 --- a/examples/gzmultidecoder-bufread.rs +++ b/examples/gzmultidecoder-bufread.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::bufread::MultiGzDecoder; use flate2::write::GzEncoder; use flate2::Compression; diff --git a/examples/gzmultidecoder-read.rs b/examples/gzmultidecoder-read.rs index 7c8a8e3..c59561d 100644 --- a/examples/gzmultidecoder-read.rs +++ b/examples/gzmultidecoder-read.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::read::MultiGzDecoder; use flate2::write::GzEncoder; use flate2::Compression; diff --git a/examples/zlibdecoder-bufread.rs b/examples/zlibdecoder-bufread.rs index 30f168a..82620c5 100644 --- a/examples/zlibdecoder-bufread.rs +++ b/examples/zlibdecoder-bufread.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::bufread::ZlibDecoder; use flate2::write::ZlibEncoder; use flate2::Compression; diff --git a/examples/zlibdecoder-read.rs b/examples/zlibdecoder-read.rs index f7e5fb0..3d4cda7 100644 --- a/examples/zlibdecoder-read.rs +++ b/examples/zlibdecoder-read.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::read::ZlibDecoder; use flate2::write::ZlibEncoder; use flate2::Compression; diff --git a/examples/zlibdecoder-write.rs b/examples/zlibdecoder-write.rs index 358e903..7f5afbe 100644 --- a/examples/zlibdecoder-write.rs +++ b/examples/zlibdecoder-write.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::ZlibDecoder; use flate2::write::ZlibEncoder; use flate2::Compression; diff --git a/examples/zlibencoder-bufread.rs b/examples/zlibencoder-bufread.rs index 0321d8d..e5e17f5 100644 --- a/examples/zlibencoder-bufread.rs +++ b/examples/zlibencoder-bufread.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::bufread::ZlibEncoder; use flate2::Compression; use std::fs::File; diff --git a/examples/zlibencoder-read.rs b/examples/zlibencoder-read.rs index 779eb1d..f833a50 100644 --- a/examples/zlibencoder-read.rs +++ b/examples/zlibencoder-read.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::read::ZlibEncoder; use flate2::Compression; use std::fs::File; diff --git a/examples/zlibencoder-write.rs b/examples/zlibencoder-write.rs index 76bcf17..3e3548f 100644 --- a/examples/zlibencoder-write.rs +++ b/examples/zlibencoder-write.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::write::ZlibEncoder; use flate2::Compression; use std::io::prelude::*; @@ -63,13 +63,13 @@ impl Crc { /// Combine the CRC with the CRC for the subsequent block of bytes. pub fn combine(&mut self, additional_crc: &Crc) { - self.amt += additional_crc.amt; + self.amt = self.amt.wrapping_add(additional_crc.amt); self.hasher.combine(&additional_crc.hasher); } } impl<R: Read> CrcReader<R> { - /// Create a new CrcReader. + /// Create a new `CrcReader`. pub fn new(r: R) -> CrcReader<R> { CrcReader { inner: r, @@ -79,27 +79,27 @@ impl<R: Read> CrcReader<R> { } impl<R> CrcReader<R> { - /// Get the Crc for this CrcReader. + /// Get the Crc for this `CrcReader`. pub fn crc(&self) -> &Crc { &self.crc } - /// Get the reader that is wrapped by this CrcReader. + /// Get the reader that is wrapped by this `CrcReader`. pub fn into_inner(self) -> R { self.inner } - /// Get the reader that is wrapped by this CrcReader by reference. + /// Get the reader that is wrapped by this `CrcReader` by reference. pub fn get_ref(&self) -> &R { &self.inner } - /// Get a mutable reference to the reader that is wrapped by this CrcReader. + /// Get a mutable reference to the reader that is wrapped by this `CrcReader`. pub fn get_mut(&mut self) -> &mut R { &mut self.inner } - /// Reset the Crc in this CrcReader. + /// Reset the Crc in this `CrcReader`. pub fn reset(&mut self) { self.crc.reset(); } @@ -135,34 +135,34 @@ pub struct CrcWriter<W> { } impl<W> CrcWriter<W> { - /// Get the Crc for this CrcWriter. + /// Get the Crc for this `CrcWriter`. pub fn crc(&self) -> &Crc { &self.crc } - /// Get the writer that is wrapped by this CrcWriter. + /// Get the writer that is wrapped by this `CrcWriter`. pub fn into_inner(self) -> W { self.inner } - /// Get the writer that is wrapped by this CrcWriter by reference. + /// Get the writer that is wrapped by this `CrcWriter` by reference. pub fn get_ref(&self) -> &W { &self.inner } - /// Get a mutable reference to the writer that is wrapped by this CrcWriter. + /// Get a mutable reference to the writer that is wrapped by this `CrcWriter`. pub fn get_mut(&mut self) -> &mut W { &mut self.inner } - /// Reset the Crc in this CrcWriter. + /// Reset the Crc in this `CrcWriter`. pub fn reset(&mut self) { self.crc.reset(); } } impl<W: Write> CrcWriter<W> { - /// Create a new CrcWriter. + /// Create a new `CrcWriter`. pub fn new(w: W) -> CrcWriter<W> { CrcWriter { inner: w, diff --git a/src/deflate/bufread.rs b/src/deflate/bufread.rs index f0b29e0..c70a630 100644 --- a/src/deflate/bufread.rs +++ b/src/deflate/bufread.rs @@ -7,9 +7,10 @@ use crate::{Compress, Decompress}; /// A DEFLATE encoder, or compressor. /// -/// This structure consumes a [`BufRead`] interface, reading uncompressed data -/// from the underlying reader, and emitting compressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// uncompressed data from the underlying [`BufRead`] and provides the compressed data. /// +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -123,9 +124,10 @@ impl<W: BufRead + Write> Write for DeflateEncoder<W> { /// A DEFLATE decoder, or decompressor. /// -/// This structure consumes a [`BufRead`] interface, reading compressed data -/// from the underlying reader, and emitting uncompressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`BufRead`] and provides the uncompressed data. /// +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -202,7 +204,7 @@ impl<R> DeflateDecoder<R> { /// Acquires a mutable reference to the underlying stream /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { &mut self.obj } diff --git a/src/deflate/mod.rs b/src/deflate/mod.rs index 51758b3..7f3bf70 100644 --- a/src/deflate/mod.rs +++ b/src/deflate/mod.rs @@ -18,14 +18,14 @@ mod tests { let v = crate::random_bytes().take(1024).collect::<Vec<_>>(); for _ in 0..200 { let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); + real.extend(to_write.iter().copied()); w.write_all(to_write).unwrap(); } let result = w.finish().unwrap(); let mut r = read::DeflateDecoder::new(&result[..]); let mut ret = Vec::new(); r.read_to_end(&mut ret).unwrap(); - assert!(ret == real); + assert_eq!(ret, real); } #[test] @@ -37,7 +37,7 @@ mod tests { let mut r = read::DeflateDecoder::new(&data[..]); let mut ret = Vec::new(); r.read_to_end(&mut ret).unwrap(); - assert!(ret == b"foo"); + assert_eq!(ret, b"foo"); } #[test] @@ -47,7 +47,7 @@ mod tests { let v = crate::random_bytes().take(1024).collect::<Vec<_>>(); for _ in 0..200 { let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); + real.extend(to_write.iter().copied()); w.write_all(to_write).unwrap(); } let mut result = w.finish().unwrap(); @@ -55,13 +55,13 @@ mod tests { let result_len = result.len(); for _ in 0..200 { - result.extend(v.iter().map(|x| *x)); + result.extend(v.iter().copied()); } let mut r = read::DeflateDecoder::new(&result[..]); let mut ret = Vec::new(); r.read_to_end(&mut ret).unwrap(); - assert!(ret == real); + assert_eq!(ret, real); assert_eq!(r.total_in(), result_len as u64); } @@ -84,7 +84,7 @@ mod tests { ); w.write_all(&v).unwrap(); let w = w.finish().unwrap().finish().unwrap(); - assert!(w == v); + assert_eq!(w, v); } #[test] @@ -159,7 +159,7 @@ mod tests { let mut d = read::DeflateDecoder::new(&result[..]); let mut data = Vec::new(); - assert!(d.read(&mut data).unwrap() == 0); + assert_eq!(d.read(&mut data).unwrap(), 0); } #[test] diff --git a/src/deflate/read.rs b/src/deflate/read.rs index fd17a89..2b6b8f2 100644 --- a/src/deflate/read.rs +++ b/src/deflate/read.rs @@ -6,8 +6,8 @@ use crate::bufreader::BufReader; /// A DEFLATE encoder, or compressor. /// -/// This structure implements a [`Read`] interface and will read uncompressed -/// data from an underlying stream and emit a stream of compressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// uncompressed data from the underlying [`Read`] and provides the compressed data. /// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// @@ -25,11 +25,11 @@ use crate::bufreader::BufReader; /// # /// // Return a vector containing the Deflate compressed version of hello world /// fn deflateencoder_read_hello_world() -> io::Result<Vec<u8>> { -/// let mut ret_vec = [0;100]; +/// let mut ret_vec = Vec::new(); /// let c = b"hello world"; /// let mut deflater = DeflateEncoder::new(&c[..], Compression::fast()); -/// let count = deflater.read(&mut ret_vec)?; -/// Ok(ret_vec[0..count].to_vec()) +/// deflater.read_to_end(&mut ret_vec)?; +/// Ok(ret_vec) /// } /// ``` #[derive(Debug)] @@ -120,8 +120,8 @@ impl<W: Read + Write> Write for DeflateEncoder<W> { /// A DEFLATE decoder, or decompressor. /// -/// This structure implements a [`Read`] interface and takes a stream of -/// compressed data as input, providing the decompressed data when read from. +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`Read`] and provides the uncompressed data. /// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// @@ -196,7 +196,7 @@ impl<R> DeflateDecoder<R> { /// Acquires a mutable reference to the underlying stream /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.inner.get_mut().get_mut() } diff --git a/src/ffi/c.rs b/src/ffi/c.rs index 59e2011..32864f8 100644 --- a/src/ffi/c.rs +++ b/src/ffi/c.rs @@ -215,9 +215,9 @@ impl InflateBackend for Inflate { let raw = &mut *self.inner.stream_wrapper; raw.msg = ptr::null_mut(); raw.next_in = input.as_ptr() as *mut u8; - raw.avail_in = cmp::min(input.len(), c_uint::max_value() as usize) as c_uint; + raw.avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint; raw.next_out = output.as_mut_ptr(); - raw.avail_out = cmp::min(output.len(), c_uint::max_value() as usize) as c_uint; + raw.avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint; let rc = unsafe { mz_inflate(raw, flush as c_int) }; @@ -226,6 +226,12 @@ impl InflateBackend for Inflate { self.inner.total_in += (raw.next_in as usize - input.as_ptr() as usize) as u64; self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64; + // reset these pointers so we don't accidentally read them later + raw.next_in = ptr::null_mut(); + raw.avail_in = 0; + raw.next_out = ptr::null_mut(); + raw.avail_out = 0; + match rc { MZ_DATA_ERROR | MZ_STREAM_ERROR => mem::decompress_failed(self.inner.msg()), MZ_OK => Ok(Status::Ok), @@ -303,9 +309,9 @@ impl DeflateBackend for Deflate { let raw = &mut *self.inner.stream_wrapper; raw.msg = ptr::null_mut(); raw.next_in = input.as_ptr() as *mut _; - raw.avail_in = cmp::min(input.len(), c_uint::max_value() as usize) as c_uint; + raw.avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint; raw.next_out = output.as_mut_ptr(); - raw.avail_out = cmp::min(output.len(), c_uint::max_value() as usize) as c_uint; + raw.avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint; let rc = unsafe { mz_deflate(raw, flush as c_int) }; @@ -314,6 +320,12 @@ impl DeflateBackend for Deflate { self.inner.total_in += (raw.next_in as usize - input.as_ptr() as usize) as u64; self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64; + // reset these pointers so we don't accidentally read them later + raw.next_in = ptr::null_mut(); + raw.avail_in = 0; + raw.next_out = ptr::null_mut(); + raw.avail_out = 0; + match rc { MZ_OK => Ok(Status::Ok), MZ_BUF_ERROR => Ok(Status::BufError), diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index 8bac6e4..20b3cae 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -40,9 +40,9 @@ mod c; #[cfg(feature = "any_zlib")] pub use self::c::*; -#[cfg(not(feature = "any_zlib"))] +#[cfg(all(not(feature = "any_zlib"), feature = "miniz_oxide"))] mod rust; -#[cfg(not(feature = "any_zlib"))] +#[cfg(all(not(feature = "any_zlib"), feature = "miniz_oxide"))] pub use self::rust::*; impl std::fmt::Debug for ErrorMessage { diff --git a/src/ffi/rust.rs b/src/ffi/rust.rs index eadd6ec..bed6629 100644 --- a/src/ffi/rust.rs +++ b/src/ffi/rust.rs @@ -1,4 +1,4 @@ -//! Implementation for miniz_oxide rust backend. +//! Implementation for `miniz_oxide` rust backend. use std::convert::TryInto; use std::fmt; diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 6be144d..679b4a7 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -3,9 +3,8 @@ use std::io; use std::io::prelude::*; use std::mem; -use super::{GzBuilder, GzHeader}; -use super::{FCOMMENT, FEXTRA, FHCRC, FNAME}; -use crate::crc::{Crc, CrcReader}; +use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; +use crate::crc::CrcReader; use crate::deflate; use crate::Compression; @@ -18,118 +17,12 @@ fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { min } -pub(crate) fn corrupt() -> io::Error { - io::Error::new( - io::ErrorKind::InvalidInput, - "corrupt gzip stream does not have a matching checksum", - ) -} - -fn bad_header() -> io::Error { - io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header") -} - -fn read_le_u16<R: Read>(r: &mut Buffer<R>) -> io::Result<u16> { - let mut b = [0; 2]; - r.read_and_forget(&mut b)?; - Ok((b[0] as u16) | ((b[1] as u16) << 8)) -} - -fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> { - loop { - match r.part.state { - GzHeaderParsingState::Start => { - let mut header = [0; 10]; - r.read_and_forget(&mut header)?; - - if header[0] != 0x1f || header[1] != 0x8b { - return Err(bad_header()); - } - if header[2] != 8 { - return Err(bad_header()); - } - - r.part.flg = header[3]; - r.part.header.mtime = ((header[4] as u32) << 0) - | ((header[5] as u32) << 8) - | ((header[6] as u32) << 16) - | ((header[7] as u32) << 24); - let _xfl = header[8]; - r.part.header.operating_system = header[9]; - r.part.state = GzHeaderParsingState::Xlen; - } - GzHeaderParsingState::Xlen => { - if r.part.flg & FEXTRA != 0 { - r.part.xlen = read_le_u16(r)?; - } - r.part.state = GzHeaderParsingState::Extra; - } - GzHeaderParsingState::Extra => { - if r.part.flg & FEXTRA != 0 { - let mut extra = vec![0; r.part.xlen as usize]; - r.read_and_forget(&mut extra)?; - r.part.header.extra = Some(extra); - } - r.part.state = GzHeaderParsingState::Filename; - } - GzHeaderParsingState::Filename => { - if r.part.flg & FNAME != 0 { - if None == r.part.header.filename { - r.part.header.filename = Some(Vec::new()); - }; - for byte in r.bytes() { - let byte = byte?; - if byte == 0 { - break; - } - } - } - r.part.state = GzHeaderParsingState::Comment; - } - GzHeaderParsingState::Comment => { - if r.part.flg & FCOMMENT != 0 { - if None == r.part.header.comment { - r.part.header.comment = Some(Vec::new()); - }; - for byte in r.bytes() { - let byte = byte?; - if byte == 0 { - break; - } - } - } - r.part.state = GzHeaderParsingState::Crc; - } - GzHeaderParsingState::Crc => { - if r.part.flg & FHCRC != 0 { - let stored_crc = read_le_u16(r)?; - let calced_crc = r.part.crc.sum() as u16; - if stored_crc != calced_crc { - return Err(corrupt()); - } - } - return Ok(()); - } - } - } -} - -pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> { - let mut part = GzHeaderPartial::new(); - - let result = { - let mut reader = Buffer::new(&mut part, r); - read_gz_header_part(&mut reader) - }; - result.map(|()| part.take_header()) -} - /// A gzip streaming encoder /// -/// This structure exposes a [`BufRead`] interface that will read uncompressed data -/// from the underlying reader and expose the compressed version as a [`BufRead`] -/// interface. +/// This structure implements a [`Read`] interface. When read from, it reads +/// uncompressed data from the underlying [`BufRead`] and provides the compressed data. /// +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -270,11 +163,22 @@ impl<R: BufRead + Write> Write for GzEncoder<R> { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. +/// +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`BufRead`] and provides the uncompressed data. /// -/// This structure consumes a [`BufRead`] interface, reading compressed data -/// from the underlying reader, and emitting uncompressed data. +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// If you need the following bytes, call `into_inner()` after Ok(0) to +/// recover the underlying reader. /// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -305,161 +209,38 @@ impl<R: BufRead + Write> Write for GzEncoder<R> { /// ``` #[derive(Debug)] pub struct GzDecoder<R> { - inner: GzState, - header: Option<GzHeader>, + state: GzState, reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, multi: bool, } #[derive(Debug)] -pub enum GzHeaderParsingState { - Start, - Xlen, - Extra, - Filename, - Comment, - Crc, -} - -#[derive(Debug)] -pub struct GzHeaderPartial { - buf: Vec<u8>, - state: GzHeaderParsingState, - flg: u8, - xlen: u16, - crc: Crc, - header: GzHeader, -} - -impl GzHeaderPartial { - fn new() -> GzHeaderPartial { - GzHeaderPartial { - buf: Vec::with_capacity(10), // minimum header length - state: GzHeaderParsingState::Start, - flg: 0, - xlen: 0, - crc: Crc::new(), - header: GzHeader { - extra: None, - filename: None, - comment: None, - operating_system: 0, - mtime: 0, - }, - } - } - - pub fn take_header(self) -> GzHeader { - self.header - } -} - -#[derive(Debug)] enum GzState { - Header(GzHeaderPartial), - Body, - Finished(usize, [u8; 8]), + Header(GzHeaderParser), + Body(GzHeader), + Finished(GzHeader, usize, [u8; 8]), Err(io::Error), - End, -} - -/// A small adapter which reads data originally from `buf` and then reads all -/// further data from `reader`. This will also buffer all data read from -/// `reader` into `buf` for reuse on a further call. -struct Buffer<'a, T: 'a> { - part: &'a mut GzHeaderPartial, - buf_cur: usize, - buf_max: usize, - reader: &'a mut T, -} - -impl<'a, T> Buffer<'a, T> { - fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> { - Buffer { - reader, - buf_cur: 0, - buf_max: part.buf.len(), - part, - } - } -} - -impl<'a, T: Read> Read for Buffer<'a, T> { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - let mut bufref = match self.part.state { - GzHeaderParsingState::Filename => self.part.header.filename.as_mut(), - GzHeaderParsingState::Comment => self.part.header.comment.as_mut(), - _ => None, - }; - if let Some(ref mut b) = bufref { - // we have a direct reference to a buffer where to write - let len = self.reader.read(buf)?; - if len > 0 && buf[len - 1] == 0 { - // we do not append the final 0 - b.extend_from_slice(&buf[..len - 1]); - } else { - b.extend_from_slice(&buf[..len]); - } - self.part.crc.update(&buf[..len]); - Ok(len) - } else if self.buf_cur == self.buf_max { - // we read new bytes and also save them in self.part.buf - let len = self.reader.read(buf)?; - self.part.buf.extend_from_slice(&buf[..len]); - self.part.crc.update(&buf[..len]); - Ok(len) - } else { - // we first read the previously saved bytes - let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?; - self.buf_cur += len; - Ok(len) - } - } -} - -impl<'a, T> Buffer<'a, T> -where - T: std::io::Read, -{ - // If we manage to read all the bytes, we reset the buffer - fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result<usize> { - self.read_exact(buf)?; - // we managed to read the whole buf - // we will no longer need the previously saved bytes in self.part.buf - let rlen = buf.len(); - self.part.buf.truncate(0); - self.buf_cur = 0; - self.buf_max = 0; - Ok(rlen) - } + End(Option<GzHeader>), } impl<R: BufRead> GzDecoder<R> { /// Creates a new decoder from the given reader, immediately parsing the /// gzip header. pub fn new(mut r: R) -> GzDecoder<R> { - let mut part = GzHeaderPartial::new(); - let mut header = None; + let mut header_parser = GzHeaderParser::new(); - let result = { - let mut reader = Buffer::new(&mut part, &mut r); - read_gz_header_part(&mut reader) - }; - - let state = match result { - Ok(()) => { - header = Some(part.take_header()); - GzState::Body + let state = match header_parser.parse(&mut r) { + Ok(_) => GzState::Body(GzHeader::from(header_parser)), + Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { + GzState::Header(header_parser) } - Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part), Err(err) => GzState::Err(err), }; GzDecoder { - inner: state, + state, reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), multi: false, - header, } } @@ -472,7 +253,11 @@ impl<R: BufRead> GzDecoder<R> { impl<R> GzDecoder<R> { /// Returns the header associated with this stream, if it was valid pub fn header(&self) -> Option<&GzHeader> { - self.header.as_ref() + match &self.state { + GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), + GzState::End(header) => header.as_ref(), + _ => None, + } } /// Acquires a reference to the underlying reader. @@ -483,7 +268,7 @@ impl<R> GzDecoder<R> { /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.reader.get_mut().get_mut() } @@ -496,111 +281,61 @@ impl<R> GzDecoder<R> { impl<R: BufRead> Read for GzDecoder<R> { fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { - let GzDecoder { - inner, - header, - reader, - multi, - } = self; - loop { - *inner = match mem::replace(inner, GzState::End) { - GzState::Header(mut part) => { - let result = { - let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut()); - read_gz_header_part(&mut reader) - }; - match result { - Ok(()) => { - *header = Some(part.take_header()); - GzState::Body - } - Err(err) if io::ErrorKind::WouldBlock == err.kind() => { - *inner = GzState::Header(part); - return Err(err); - } - Err(err) => return Err(err), - } + match &mut self.state { + GzState::Header(parser) => { + parser.parse(self.reader.get_mut().get_mut())?; + self.state = GzState::Body(GzHeader::from(mem::take(parser))); } - GzState::Body => { + GzState::Body(header) => { if into.is_empty() { - *inner = GzState::Body; return Ok(0); } - - let n = reader.read(into).map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Body; + match self.reader.read(into)? { + 0 => { + self.state = GzState::Finished(mem::take(header), 0, [0; 8]); } - - err - })?; - - match n { - 0 => GzState::Finished(0, [0; 8]), n => { - *inner = GzState::Body; return Ok(n); } } } - GzState::Finished(pos, mut buf) => { - if pos < buf.len() { - let n = reader - .get_mut() - .get_mut() - .read(&mut buf[pos..]) - .and_then(|n| { - if n == 0 { - Err(io::ErrorKind::UnexpectedEof.into()) - } else { - Ok(n) - } - }) - .map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Finished(pos, buf); - } - - err - })?; - - GzState::Finished(pos + n, buf) + GzState::Finished(header, pos, buf) => { + if *pos < buf.len() { + *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; } else { let (crc, amt) = finish(&buf); - if crc != reader.crc().sum() || amt != reader.crc().amount() { + if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { + self.state = GzState::End(Some(mem::take(header))); return Err(corrupt()); - } else if *multi { - let is_eof = reader + } else if self.multi { + let is_eof = self + .reader .get_mut() .get_mut() .fill_buf() - .map(|buf| buf.is_empty()) - .map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Finished(pos, buf); - } - - err - })?; + .map(|buf| buf.is_empty())?; if is_eof { - GzState::End + self.state = GzState::End(Some(mem::take(header))); } else { - reader.reset(); - reader.get_mut().reset_data(); - header.take(); - GzState::Header(GzHeaderPartial::new()) + self.reader.reset(); + self.reader.get_mut().reset_data(); + self.state = GzState::Header(GzHeaderParser::new()) } } else { - GzState::End + self.state = GzState::End(Some(mem::take(header))); } } } - GzState::Err(err) => return Err(err), - GzState::End => return Ok(0), - }; + GzState::Err(err) => { + let result = Err(mem::replace(err, io::ErrorKind::Other.into())); + self.state = GzState::End(None); + return result; + } + GzState::End(_) => return Ok(0), + } } } } @@ -615,18 +350,20 @@ impl<R: BufRead + Write> Write for GzDecoder<R> { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. +/// +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`BufRead`] and provides the uncompressed data. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the +/// underlying reader does. For a file, this reads to the end of the file. /// -/// This structure exposes a [`BufRead`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -681,7 +418,7 @@ impl<R> MultiGzDecoder<R> { /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.0.get_mut() } @@ -699,154 +436,48 @@ impl<R: BufRead> Read for MultiGzDecoder<R> { } #[cfg(test)] -pub mod tests { - use crate::gz::bufread::*; - use std::io; - use std::io::{Cursor, Read, Write}; - - //a cursor turning EOF into blocking errors - #[derive(Debug)] - pub struct BlockingCursor { - pub cursor: Cursor<Vec<u8>>, - } - - impl BlockingCursor { - pub fn new() -> BlockingCursor { - BlockingCursor { - cursor: Cursor::new(Vec::new()), - } - } - - pub fn set_position(&mut self, pos: u64) { - return self.cursor.set_position(pos); - } - - pub fn position(&mut self) -> u64 { - return self.cursor.position(); - } - } - - impl Write for BlockingCursor { - fn write(&mut self, buf: &[u8]) -> io::Result<usize> { - return self.cursor.write(buf); - } - fn flush(&mut self) -> io::Result<()> { - return self.cursor.flush(); - } - } - - impl Read for BlockingCursor { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - //use the cursor, except it turns eof into blocking error - let r = self.cursor.read(buf); - match r { - Err(ref err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(io::ErrorKind::WouldBlock.into()); - } - } - Ok(0) => { - //regular EOF turned into blocking error - return Err(io::ErrorKind::WouldBlock.into()); - } - Ok(_n) => {} - } - return r; - } - } +mod test { + use crate::bufread::GzDecoder; + use crate::gz::write; + use crate::Compression; + use std::io::{Read, Write}; + + // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any + // additional data to be consumed by the caller. #[test] - // test function read_and_forget of Buffer - fn buffer_read_and_forget() { - // this is unused except for the buffering - let mut part = GzHeaderPartial::new(); - // this is a reader which receives data afterwards - let mut r = BlockingCursor::new(); - let data = vec![1, 2, 3]; - let mut out = Vec::with_capacity(7); - - match r.write_all(&data) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(0); - - // First read : successful for one byte - let mut reader = Buffer::new(&mut part, &mut r); - out.resize(1, 0); - match reader.read_and_forget(&mut out) { - Ok(1) => {} - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } - - // Second read : incomplete for 7 bytes (we have only 2) - out.resize(7, 0); - match reader.read_and_forget(&mut out) { - Err(ref err) => { - assert_eq!(io::ErrorKind::WouldBlock, err.kind()); - } - _ => { - panic!("Unexpected result for read_and_forget with incomplete"); - } - } - - // 3 more data bytes have arrived - let pos = r.position(); - let data2 = vec![4, 5, 6]; - match r.write_all(&data2) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(pos); - - // Third read : still incomplete for 7 bytes (we have 5) - let mut reader2 = Buffer::new(&mut part, &mut r); - match reader2.read_and_forget(&mut out) { - Err(ref err) => { - assert_eq!(io::ErrorKind::WouldBlock, err.kind()); - } - _ => { - panic!("Unexpected result for read_and_forget with more incomplete"); - } - } - - // 3 more data bytes have arrived again - let pos2 = r.position(); - let data3 = vec![7, 8, 9]; - match r.write_all(&data3) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(pos2); - - // Fourth read : now successful for 7 bytes - let mut reader3 = Buffer::new(&mut part, &mut r); - match reader3.read_and_forget(&mut out) { - Ok(7) => { - assert_eq!(out[0], 2); - assert_eq!(out[6], 8); - } - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } + fn decode_extra_data() { + let expected = "Hello World"; + + let compressed = { + let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); + e.write(expected.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; - // Fifth read : successful for one more byte - out.resize(1, 0); - match reader3.read_and_forget(&mut out) { - Ok(1) => { - assert_eq!(out[0], 9); - } - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } + let mut output = Vec::new(); + let mut decoder = GzDecoder::new(compressed.as_slice()); + let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); + assert_eq!(decoded_bytes, output.len()); + let actual = std::str::from_utf8(&output).expect("String parsing error"); + assert_eq!( + actual, expected, + "after decompression we obtain the original input" + ); + + output.clear(); + assert_eq!( + decoder.read(&mut output).unwrap(), + 0, + "subsequent read of decoder returns 0, but inner reader can return additional data" + ); + let mut reader = decoder.into_inner(); + assert_eq!( + reader.read_to_end(&mut output).unwrap(), + 1, + "extra data is accessible in underlying buf-read" + ); + assert_eq!(output, b"x"); } } diff --git a/src/gz/mod.rs b/src/gz/mod.rs index 505450e..31a6961 100644 --- a/src/gz/mod.rs +++ b/src/gz/mod.rs @@ -1,19 +1,24 @@ use std::ffi::CString; -use std::io::prelude::*; +use std::io::{BufRead, Error, ErrorKind, Read, Result, Write}; use std::time; use crate::bufreader::BufReader; -use crate::Compression; +use crate::{Compression, Crc}; pub static FHCRC: u8 = 1 << 1; pub static FEXTRA: u8 = 1 << 2; pub static FNAME: u8 = 1 << 3; pub static FCOMMENT: u8 = 1 << 4; +pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7; pub mod bufread; pub mod read; pub mod write; +// The maximum length of the header filename and comment fields. More than +// enough for these fields in reasonable use, but prevents possible attacks. +const MAX_HEADER_BUF: usize = 65535; + /// A structure representing the header of a gzip stream. /// /// The header can contain metadata about the file that was compressed, if @@ -82,6 +87,210 @@ impl GzHeader { } } +#[derive(Debug)] +pub enum GzHeaderState { + Start(u8, [u8; 10]), + Xlen(Option<Box<Crc>>, u8, [u8; 2]), + Extra(Option<Box<Crc>>, u16), + Filename(Option<Box<Crc>>), + Comment(Option<Box<Crc>>), + Crc(Option<Box<Crc>>, u8, [u8; 2]), + Complete, +} + +impl Default for GzHeaderState { + fn default() -> Self { + Self::Complete + } +} + +#[derive(Debug, Default)] +pub struct GzHeaderParser { + state: GzHeaderState, + flags: u8, + header: GzHeader, +} + +impl GzHeaderParser { + fn new() -> Self { + GzHeaderParser { + state: GzHeaderState::Start(0, [0; 10]), + flags: 0, + header: GzHeader::default(), + } + } + + fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> { + loop { + match &mut self.state { + GzHeaderState::Start(count, buffer) => { + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + // Gzip identification bytes + if buffer[0] != 0x1f || buffer[1] != 0x8b { + return Err(bad_header()); + } + // Gzip compression method (8 = deflate) + if buffer[2] != 8 { + return Err(bad_header()); + } + self.flags = buffer[3]; + // RFC1952: "must give an error indication if any reserved bit is non-zero" + if self.flags & FRESERVED != 0 { + return Err(bad_header()); + } + self.header.mtime = ((buffer[4] as u32) << 0) + | ((buffer[5] as u32) << 8) + | ((buffer[6] as u32) << 16) + | ((buffer[7] as u32) << 24); + let _xfl = buffer[8]; + self.header.operating_system = buffer[9]; + let crc = if self.flags & FHCRC != 0 { + let mut crc = Box::new(Crc::new()); + crc.update(buffer); + Some(crc) + } else { + None + }; + self.state = GzHeaderState::Xlen(crc, 0, [0; 2]); + } + GzHeaderState::Xlen(crc, count, buffer) => { + if self.flags & FEXTRA != 0 { + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + if let Some(crc) = crc { + crc.update(buffer); + } + let xlen = parse_le_u16(&buffer); + self.header.extra = Some(vec![0; xlen as usize]); + self.state = GzHeaderState::Extra(crc.take(), 0); + } else { + self.state = GzHeaderState::Filename(crc.take()); + } + } + GzHeaderState::Extra(crc, count) => { + debug_assert!(self.header.extra.is_some()); + let extra = self.header.extra.as_mut().unwrap(); + while (*count as usize) < extra.len() { + *count += read_into(r, &mut extra[*count as usize..])? as u16; + } + if let Some(crc) = crc { + crc.update(extra); + } + self.state = GzHeaderState::Filename(crc.take()); + } + GzHeaderState::Filename(crc) => { + if self.flags & FNAME != 0 { + let filename = self.header.filename.get_or_insert_with(Vec::new); + read_to_nul(r, filename)?; + if let Some(crc) = crc { + crc.update(filename); + crc.update(b"\0"); + } + } + self.state = GzHeaderState::Comment(crc.take()); + } + GzHeaderState::Comment(crc) => { + if self.flags & FCOMMENT != 0 { + let comment = self.header.comment.get_or_insert_with(Vec::new); + read_to_nul(r, comment)?; + if let Some(crc) = crc { + crc.update(comment); + crc.update(b"\0"); + } + } + self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]); + } + GzHeaderState::Crc(crc, count, buffer) => { + if let Some(crc) = crc { + debug_assert!(self.flags & FHCRC != 0); + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + let stored_crc = parse_le_u16(&buffer); + let calced_crc = crc.sum() as u16; + if stored_crc != calced_crc { + return Err(corrupt()); + } + } + self.state = GzHeaderState::Complete; + } + GzHeaderState::Complete => { + return Ok(()); + } + } + } + } + + fn header(&self) -> Option<&GzHeader> { + match self.state { + GzHeaderState::Complete => Some(&self.header), + _ => None, + } + } +} + +impl From<GzHeaderParser> for GzHeader { + fn from(parser: GzHeaderParser) -> Self { + debug_assert!(matches!(parser.state, GzHeaderState::Complete)); + parser.header + } +} + +// Attempt to fill the `buffer` from `r`. Return the number of bytes read. +// Return an error if EOF is read before the buffer is full. This differs +// from `read` in that Ok(0) means that more data may be available. +fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> { + debug_assert!(!buffer.is_empty()); + match r.read(buffer) { + Ok(0) => Err(ErrorKind::UnexpectedEof.into()), + Ok(n) => Ok(n), + Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0), + Err(e) => Err(e), + } +} + +// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`. +fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> { + let mut bytes = r.bytes(); + loop { + match bytes.next().transpose()? { + Some(byte) if byte == 0 => { + return Ok(()); + } + Some(_) if buffer.len() == MAX_HEADER_BUF => { + return Err(Error::new( + ErrorKind::InvalidInput, + "gzip header field too long", + )); + } + Some(byte) => { + buffer.push(byte); + } + None => { + return Err(ErrorKind::UnexpectedEof.into()); + } + } + } +} + +fn parse_le_u16(buffer: &[u8; 2]) -> u16 { + (buffer[0] as u16) | ((buffer[1] as u16) << 8) +} + +fn bad_header() -> Error { + Error::new(ErrorKind::InvalidInput, "invalid gzip header") +} + +fn corrupt() -> Error { + Error::new( + ErrorKind::InvalidInput, + "corrupt gzip stream does not have a matching checksum", + ) +} + /// A builder structure to create a new gzip Encoder. /// /// This structure controls header configuration options such as the filename. @@ -218,11 +427,11 @@ impl GzBuilder { } if let Some(filename) = filename { flg |= FNAME; - header.extend(filename.as_bytes_with_nul().iter().map(|x| *x)); + header.extend(filename.as_bytes_with_nul().iter().copied()); } if let Some(comment) = comment { flg |= FCOMMENT; - header.extend(comment.as_bytes_with_nul().iter().map(|x| *x)); + header.extend(comment.as_bytes_with_nul().iter().copied()); } header[0] = 0x1f; header[1] = 0x8b; @@ -253,8 +462,8 @@ impl GzBuilder { mod tests { use std::io::prelude::*; - use super::{read, write, GzBuilder}; - use crate::Compression; + use super::{read, write, GzBuilder, GzHeaderParser}; + use crate::{Compression, GzHeader}; use rand::{thread_rng, Rng}; #[test] @@ -285,14 +494,14 @@ mod tests { let v = crate::random_bytes().take(1024).collect::<Vec<_>>(); for _ in 0..200 { let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); + real.extend(to_write.iter().copied()); w.write_all(to_write).unwrap(); } let result = w.finish().unwrap(); let mut r = read::GzDecoder::new(&result[..]); let mut v = Vec::new(); r.read_to_end(&mut v).unwrap(); - assert!(v == real); + assert_eq!(v, real); } #[test] @@ -301,7 +510,86 @@ mod tests { let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default())); let mut res = Vec::new(); r.read_to_end(&mut res).unwrap(); - assert!(res == v); + assert_eq!(res, v); + } + + // A Rust implementation of CRC that closely matches the C code in RFC1952. + // Only use this to create CRCs for tests. + struct Rfc1952Crc { + /* Table of CRCs of all 8-bit messages. */ + crc_table: [u32; 256], + } + + impl Rfc1952Crc { + fn new() -> Self { + let mut crc = Rfc1952Crc { + crc_table: [0; 256], + }; + /* Make the table for a fast CRC. */ + for n in 0usize..256 { + let mut c = n as u32; + for _k in 0..8 { + if c & 1 != 0 { + c = 0xedb88320 ^ (c >> 1); + } else { + c = c >> 1; + } + } + crc.crc_table[n] = c; + } + crc + } + + /* + Update a running crc with the bytes buf and return + the updated crc. The crc should be initialized to zero. Pre- and + post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the caller. + */ + fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 { + let mut c = crc ^ 0xffffffff; + + for b in buf { + c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8); + } + c ^ 0xffffffff + } + + /* Return the CRC of the bytes buf. */ + fn crc(&self, buf: &[u8]) -> u32 { + self.update_crc(0, buf) + } + } + + #[test] + fn roundtrip_header() { + let mut header = GzBuilder::new() + .mtime(1234) + .operating_system(57) + .filename("filename") + .comment("comment") + .into_header(Compression::fast()); + + // Add a CRC to the header + header[3] = header[3] ^ super::FHCRC; + let rfc1952_crc = Rfc1952Crc::new(); + let crc32 = rfc1952_crc.crc(&header); + let crc16 = crc32 as u16; + header.extend(&crc16.to_le_bytes()); + + let mut parser = GzHeaderParser::new(); + parser.parse(&mut header.as_slice()).unwrap(); + let actual = parser.header().unwrap(); + assert_eq!( + actual, + &GzHeader { + extra: None, + filename: Some("filename".as_bytes().to_vec()), + comment: Some("comment".as_bytes().to_vec()), + operating_system: 57, + mtime: 1234 + } + ) } #[test] @@ -353,33 +641,4 @@ mod tests { write!(f, "Hello world").unwrap(); f.flush().unwrap(); } - - use crate::gz::bufread::tests::BlockingCursor; - #[test] - // test function read_and_forget of Buffer - fn blocked_partial_header_read() { - // this is a reader which receives data afterwards - let mut r = BlockingCursor::new(); - let data = vec![1, 2, 3]; - - match r.write_all(&data) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(0); - - // this is unused except for the buffering - let mut decoder = read::GzDecoder::new(r); - let mut out = Vec::with_capacity(7); - match decoder.read(&mut out) { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::WouldBlock); - } - _ => { - panic!("Unexpected result for decoder.read"); - } - } - } } diff --git a/src/gz/read.rs b/src/gz/read.rs index dbbe632..9dbadbd 100644 --- a/src/gz/read.rs +++ b/src/gz/read.rs @@ -8,9 +8,8 @@ use crate::Compression; /// A gzip streaming encoder /// -/// This structure exposes a [`Read`] interface that will read uncompressed data -/// from the underlying reader and expose the compressed version as a [`Read`] -/// interface. +/// This structure implements a [`Read`] interface. When read from, it reads +/// uncompressed data from the underlying [`Read`] and provides the compressed data. /// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// @@ -25,11 +24,11 @@ use crate::Compression; /// // Return a vector containing the GZ compressed version of hello world /// /// fn gzencode_hello_world() -> io::Result<Vec<u8>> { -/// let mut ret_vec = [0;100]; +/// let mut ret_vec = Vec::new(); /// let bytestring = b"hello world"; /// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast()); -/// let count = gz.read(&mut ret_vec)?; -/// Ok(ret_vec[0..count].to_vec()) +/// gz.read_to_end(&mut ret_vec)?; +/// Ok(ret_vec) /// } /// ``` #[derive(Debug)] @@ -90,17 +89,26 @@ impl<R: Read + Write> Write for GzEncoder<R> { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// -/// This structure exposes a [`Read`] interface that will consume compressed -/// data from the underlying reader and emit uncompressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`Read`] and provides the uncompressed data. /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// `GzDecoder` may have read additional bytes past the end of the gzip data. +/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader` +/// and use `bufread::GzDecoder` instead. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// /// ``` -/// /// use std::io::prelude::*; /// use std::io; /// # use flate2::Compression; @@ -146,6 +154,9 @@ impl<R> GzDecoder<R> { } /// Acquires a reference to the underlying reader. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// To prevent this use [`bufread::GzDecoder`] instead. pub fn get_ref(&self) -> &R { self.inner.get_ref().get_ref() } @@ -153,12 +164,19 @@ impl<R> GzDecoder<R> { /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder continues to be used. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// To prevent this use [`bufread::GzDecoder`] instead. pub fn get_mut(&mut self) -> &mut R { self.inner.get_mut().get_mut() } /// Consumes this decoder, returning the underlying reader. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// Subsequent reads will skip those bytes. To prevent this use + /// [`bufread::GzDecoder`] instead. pub fn into_inner(self) -> R { self.inner.into_inner().into_inner() } @@ -180,19 +198,20 @@ impl<R: Read + Write> Write for GzDecoder<R> { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress the -/// first gzip member. The multistream format is commonly used in bioinformatics, -/// for example when using the BGZF compressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`Read`] and provides the uncompressed +/// data. /// -/// This structure exposes a [`Read`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the +/// underlying reader does. /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// @@ -250,7 +269,7 @@ impl<R> MultiGzDecoder<R> { /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.inner.get_mut().get_mut() } @@ -276,3 +295,84 @@ impl<R: Read + Write> Write for MultiGzDecoder<R> { self.get_mut().flush() } } + +#[cfg(test)] +mod tests { + use std::io::{Cursor, ErrorKind, Read, Result, Write}; + + use super::GzDecoder; + + //a cursor turning EOF into blocking errors + #[derive(Debug)] + pub struct BlockingCursor { + pub cursor: Cursor<Vec<u8>>, + } + + impl BlockingCursor { + pub fn new() -> BlockingCursor { + BlockingCursor { + cursor: Cursor::new(Vec::new()), + } + } + + pub fn set_position(&mut self, pos: u64) { + return self.cursor.set_position(pos); + } + } + + impl Write for BlockingCursor { + fn write(&mut self, buf: &[u8]) -> Result<usize> { + return self.cursor.write(buf); + } + fn flush(&mut self) -> Result<()> { + return self.cursor.flush(); + } + } + + impl Read for BlockingCursor { + fn read(&mut self, buf: &mut [u8]) -> Result<usize> { + //use the cursor, except it turns eof into blocking error + let r = self.cursor.read(buf); + match r { + Err(ref err) => { + if err.kind() == ErrorKind::UnexpectedEof { + return Err(ErrorKind::WouldBlock.into()); + } + } + Ok(0) => { + //regular EOF turned into blocking error + return Err(ErrorKind::WouldBlock.into()); + } + Ok(_n) => {} + } + return r; + } + } + + #[test] + fn blocked_partial_header_read() { + // this is a reader which receives data afterwards + let mut r = BlockingCursor::new(); + let data = vec![1, 2, 3]; + + match r.write_all(&data) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(0); + + // this is unused except for the buffering + let mut decoder = GzDecoder::new(r); + let mut out = Vec::with_capacity(7); + match decoder.read(&mut out) { + Err(e) => { + assert_eq!(e.kind(), ErrorKind::WouldBlock); + } + _ => { + panic!("Unexpected result for decoder.read"); + } + } + } +} diff --git a/src/gz/write.rs b/src/gz/write.rs index 7cf1a7c..74d6c5a 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -2,8 +2,7 @@ use std::cmp; use std::io; use std::io::prelude::*; -use super::bufread::{corrupt, read_gz_header}; -use super::{GzBuilder, GzHeader}; +use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser}; use crate::crc::{Crc, CrcWriter}; use crate::zio; use crate::{Compress, Compression, Decompress, Status}; @@ -92,7 +91,7 @@ impl<W: Write> GzEncoder<W> { self.inner.finish()?; while self.crc_bytes_written < 8 { - let (sum, amt) = (self.crc.sum() as u32, self.crc.amount()); + let (sum, amt) = (self.crc.sum(), self.crc.amount()); let buf = [ (sum >> 0) as u8, (sum >> 8) as u8, @@ -167,11 +166,20 @@ impl<W: Write> Drop for GzEncoder<W> { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// -/// This structure exposes a [`Write`] interface that will emit compressed data -/// to the underlying writer `W`. +/// This structure exposes a [`Write`] interface, receiving compressed data and +/// writing uncompressed data to the underlying writer. +/// +/// After decoding a single member of the gzip data this writer will return the number of bytes up to +/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to +/// handle any data following the gzip member. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html /// /// # Examples @@ -203,8 +211,7 @@ impl<W: Write> Drop for GzEncoder<W> { pub struct GzDecoder<W: Write> { inner: zio::Writer<CrcWriter<W>, Decompress>, crc_bytes: Vec<u8>, - header: Option<GzHeader>, - header_buf: Vec<u8>, + header_parser: GzHeaderParser, } const CRC_BYTES_LEN: usize = 8; @@ -218,14 +225,13 @@ impl<W: Write> GzDecoder<W> { GzDecoder { inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)), crc_bytes: Vec::with_capacity(CRC_BYTES_LEN), - header: None, - header_buf: Vec::new(), + header_parser: GzHeaderParser::new(), } } /// Returns the header associated with this stream. pub fn header(&self) -> Option<&GzHeader> { - self.header.as_ref() + self.header_parser.header() } /// Acquires a reference to the underlying writer. @@ -296,7 +302,7 @@ impl<W: Write> GzDecoder<W> { | ((self.crc_bytes[5] as u32) << 8) | ((self.crc_bytes[6] as u32) << 16) | ((self.crc_bytes[7] as u32) << 24); - if crc != self.inner.get_ref().crc().sum() as u32 { + if crc != self.inner.get_ref().crc().sum() { return Err(corrupt()); } if amt != self.inner.get_ref().crc().amount() { @@ -306,47 +312,24 @@ impl<W: Write> GzDecoder<W> { } } -struct Counter<T: Read> { - inner: T, - pos: usize, -} - -impl<T: Read> Read for Counter<T> { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - let pos = self.inner.read(buf)?; - self.pos += pos; - Ok(pos) - } -} - impl<W: Write> Write for GzDecoder<W> { - fn write(&mut self, buf: &[u8]) -> io::Result<usize> { - if self.header.is_none() { - // trying to avoid buffer usage - let (res, pos) = { - let mut counter = Counter { - inner: self.header_buf.chain(buf), - pos: 0, - }; - let res = read_gz_header(&mut counter); - (res, counter.pos) - }; - - match res { + fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> { + let buflen = buf.len(); + if self.header().is_none() { + match self.header_parser.parse(&mut buf) { Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - // not enough data for header, save to the buffer - self.header_buf.extend(buf); - Ok(buf.len()) + // all data read but header still not complete + Ok(buflen) } else { Err(err) } } - Ok(header) => { - self.header = Some(header); - let pos = pos - self.header_buf.len(); - self.header_buf.truncate(0); - Ok(pos) + Ok(_) => { + debug_assert!(self.header().is_some()); + // buf now contains the unread part of the original buf + let n = buflen - buf.len(); + Ok(n) } } } else { @@ -373,11 +356,119 @@ impl<W: Read + Write> Read for GzDecoder<W> { } } +/// A gzip streaming decoder that decodes a [gzip file] with multiple members. +/// +/// This structure exposes a [`Write`] interface that will consume compressed data and +/// write uncompressed data to the underlying writer. +/// +/// A gzip file consists of a series of *members* concatenated one after another. +/// `MultiGzDecoder` decodes all members of a file and writes them to the +/// underlying writer one after another. +/// +/// To handle members separately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 +#[derive(Debug)] +pub struct MultiGzDecoder<W: Write> { + inner: GzDecoder<W>, +} + +impl<W: Write> MultiGzDecoder<W> { + /// Creates a new decoder which will write uncompressed data to the stream. + /// If the gzip stream contains multiple members all will be decoded. + pub fn new(w: W) -> MultiGzDecoder<W> { + MultiGzDecoder { + inner: GzDecoder::new(w), + } + } + + /// Returns the header associated with the current member. + pub fn header(&self) -> Option<&GzHeader> { + self.inner.header() + } + + /// Acquires a reference to the underlying writer. + pub fn get_ref(&self) -> &W { + self.inner.get_ref() + } + + /// Acquires a mutable reference to the underlying writer. + /// + /// Note that mutating the output/input state of the stream may corrupt this + /// object, so care must be taken when using this method. + pub fn get_mut(&mut self) -> &mut W { + self.inner.get_mut() + } + + /// Attempt to finish this output stream, writing out final chunks of data. + /// + /// Note that this function can only be used once data has finished being + /// written to the output stream. After this function is called then further + /// calls to `write` may result in a panic. + /// + /// # Panics + /// + /// Attempts to write data to this stream may result in a panic after this + /// function is called. + /// + /// # Errors + /// + /// This function will perform I/O to finish the stream, returning any + /// errors which happen. + pub fn try_finish(&mut self) -> io::Result<()> { + self.inner.try_finish() + } + + /// Consumes this decoder, flushing the output stream. + /// + /// This will flush the underlying data stream and then return the contained + /// writer if the flush succeeded. + /// + /// Note that this function may not be suitable to call in a situation where + /// the underlying stream is an asynchronous I/O stream. To finish a stream + /// the `try_finish` (or `shutdown`) method should be used instead. To + /// re-acquire ownership of a stream it is safe to call this method after + /// `try_finish` or `shutdown` has returned `Ok`. + /// + /// # Errors + /// + /// This function will perform I/O to complete this stream, and any I/O + /// errors which occur will be returned from this function. + pub fn finish(self) -> io::Result<W> { + self.inner.finish() + } +} + +impl<W: Write> Write for MultiGzDecoder<W> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + if buf.is_empty() { + Ok(0) + } else { + match self.inner.write(buf) { + Ok(0) => { + // When the GzDecoder indicates that it has finished + // create a new GzDecoder to handle additional data. + self.inner.try_finish()?; + let w = self.inner.inner.take_inner().into_inner(); + self.inner = GzDecoder::new(w); + self.inner.write(buf) + } + res => res, + } + } + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + #[cfg(test)] mod tests { use super::*; - const STR: &'static str = "Hello World Hello World Hello World Hello World Hello World \ + const STR: &str = "Hello World Hello World Hello World Hello World Hello World \ Hello World Hello World Hello World Hello World Hello World \ Hello World Hello World Hello World Hello World Hello World \ Hello World Hello World Hello World Hello World Hello World \ @@ -418,6 +509,56 @@ mod tests { } #[test] + fn decode_writer_partial_header_filename() { + let filename = "test.txt"; + let mut e = GzBuilder::new() + .filename(filename) + .read(STR.as_bytes(), Compression::default()); + let mut bytes = Vec::new(); + e.read_to_end(&mut bytes).unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); + let n = decoder.write(&bytes[12..]).unwrap(); + if n < bytes.len() - 12 { + decoder.write(&bytes[n + 12..]).unwrap(); + } + assert_eq!( + decoder.header().unwrap().filename().unwrap(), + filename.as_bytes() + ); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] + fn decode_writer_partial_header_comment() { + let comment = "test comment"; + let mut e = GzBuilder::new() + .comment(comment) + .read(STR.as_bytes(), Compression::default()); + let mut bytes = Vec::new(); + e.read_to_end(&mut bytes).unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); + let n = decoder.write(&bytes[12..]).unwrap(); + if n < bytes.len() - 12 { + decoder.write(&bytes[n + 12..]).unwrap(); + } + assert_eq!( + decoder.header().unwrap().comment().unwrap(), + comment.as_bytes() + ); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] fn decode_writer_exact_header() { let mut e = GzEncoder::new(Vec::new(), Compression::default()); e.write(STR.as_ref()).unwrap(); @@ -447,4 +588,54 @@ mod tests { let return_string = String::from_utf8(writer).expect("String parsing error"); assert_eq!(return_string, STR); } + + // Two or more gzip files concatenated form a multi-member gzip file. MultiGzDecoder will + // concatenate the decoded contents of all members. + #[test] + fn decode_multi_writer() { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let bytes = e.finish().unwrap().repeat(2); + + let mut writer = Vec::new(); + let mut decoder = MultiGzDecoder::new(writer); + let mut count = 0; + while count < bytes.len() { + let n = decoder.write(&bytes[count..]).unwrap(); + assert!(n != 0); + count += n; + } + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + let expected = STR.repeat(2); + assert_eq!(return_string, expected); + } + + // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any + // additional data to be consumed by the caller. + #[test] + fn decode_extra_data() { + let compressed = { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + let mut consumed_bytes = 0; + loop { + let n = decoder.write(&compressed[consumed_bytes..]).unwrap(); + if n == 0 { + break; + } + consumed_bytes += n; + } + writer = decoder.finish().unwrap(); + let actual = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(actual, STR); + assert_eq!(&compressed[consumed_bytes..], b"x"); + } } @@ -65,17 +65,39 @@ //! `Write` trait if `T: Write`. That is, the "dual trait" is forwarded directly //! to the underlying object if available. //! +//! # About multi-member Gzip files +//! +//! While most `gzip` files one encounters will have a single *member* that can be read +//! with the [`GzDecoder`], there may be some files which have multiple members. +//! +//! A [`GzDecoder`] will only read the first member of gzip data, which may unexpectedly +//! provide partial results when a multi-member gzip file is encountered. `GzDecoder` is appropriate +//! for data that is designed to be read as single members from a multi-member file. `bufread::GzDecoder` +//! and `write::GzDecoder` also allow non-gzip data following gzip data to be handled. +//! +//! The [`MultiGzDecoder`] on the other hand will decode all members of a `gzip` file +//! into one consecutive stream of bytes, which hides the underlying *members* entirely. +//! If a file contains contains non-gzip data after the gzip data, MultiGzDecoder will +//! emit an error after decoding the gzip data. This behavior matches the `gzip`, +//! `gunzip`, and `zcat` command line tools. +//! //! [`read`]: read/index.html //! [`bufread`]: bufread/index.html //! [`write`]: write/index.html //! [read]: https://doc.rust-lang.org/std/io/trait.Read.html //! [write]: https://doc.rust-lang.org/std/io/trait.Write.html //! [bufread]: https://doc.rust-lang.org/std/io/trait.BufRead.html +//! [`GzDecoder`]: read/struct.GzDecoder.html +//! [`MultiGzDecoder`]: read/struct.MultiGzDecoder.html #![doc(html_root_url = "https://docs.rs/flate2/0.2")] #![deny(missing_docs)] #![deny(missing_debug_implementations)] #![allow(trivial_numeric_casts)] #![cfg_attr(test, deny(warnings))] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +#[cfg(not(feature = "any_impl",))] +compile_error!("You need to choose a zlib backend"); pub use crate::crc::{Crc, CrcReader, CrcWriter}; pub use crate::gz::GzBuilder; @@ -95,7 +117,14 @@ mod zlib; /// Types which operate over [`Read`] streams, both encoders and decoders for /// various formats. /// +/// Note that the `read` decoder types may read past the end of the compressed +/// data while decoding. If the caller requires subsequent reads to start +/// immediately following the compressed data wrap the `Read` type in a +/// [`BufReader`] and use the `BufReader` with the equivalent decoder from the +/// `bufread` module and also for the subsequent reads. +/// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// [`BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html pub mod read { pub use crate::deflate::read::DeflateDecoder; pub use crate::deflate::read::DeflateEncoder; @@ -115,6 +144,7 @@ pub mod write { pub use crate::deflate::write::DeflateEncoder; pub use crate::gz::write::GzDecoder; pub use crate::gz::write::GzEncoder; + pub use crate::gz::write::MultiGzDecoder; pub use crate::zlib::write::ZlibDecoder; pub use crate::zlib::write::ZlibEncoder; } @@ -152,7 +182,7 @@ fn _assert_send_sync() { } /// When compressing data, the compression level can be specified by a value in -/// this enum. +/// this struct. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct Compression(u32); @@ -1,7 +1,6 @@ use std::error::Error; use std::fmt; use std::io; -use std::slice; use crate::ffi::{self, Backend, Deflate, DeflateBackend, ErrorMessage, Inflate, InflateBackend}; use crate::Compression; @@ -40,9 +39,10 @@ pub struct Decompress { inner: Inflate, } -#[derive(Copy, Clone, PartialEq, Eq, Debug)] /// Values which indicate the form of flushing to be used when compressing /// in-memory data. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[non_exhaustive] pub enum FlushCompress { /// A typical parameter for passing to compression/decompression functions, /// this indicates that the underlying stream to decide how much data to @@ -80,14 +80,12 @@ pub enum FlushCompress { /// The return value may indicate that the stream is not yet done and more /// data has yet to be processed. Finish = ffi::MZ_FINISH as isize, - - #[doc(hidden)] - _Nonexhaustive, } -#[derive(Copy, Clone, PartialEq, Eq, Debug)] /// Values which indicate the form of flushing to be used when /// decompressing in-memory data. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[non_exhaustive] pub enum FlushDecompress { /// A typical parameter for passing to compression/decompression functions, /// this indicates that the underlying stream to decide how much data to @@ -108,9 +106,6 @@ pub enum FlushDecompress { /// The return value may indicate that the stream is not yet done and more /// data has yet to be processed. Finish = ffi::MZ_FINISH as isize, - - #[doc(hidden)] - _Nonexhaustive, } /// The inner state for an error when decompressing @@ -215,11 +210,6 @@ impl Compress { /// /// If `window_bits` does not fall into the range 9 ..= 15, /// `new_with_window_bits` will panic. - /// - /// # Note - /// - /// This constructor is only available when the `zlib` feature is used. - /// Other backends currently do not support custom window bits. #[cfg(feature = "any_zlib")] pub fn new_with_window_bits( level: Compression, @@ -247,11 +237,6 @@ impl Compress { /// /// If `window_bits` does not fall into the range 9 ..= 15, /// `new_with_window_bits` will panic. - /// - /// # Note - /// - /// This constructor is only available when the `zlib` feature is used. - /// Other backends currently do not support gzip headers for Compress. #[cfg(feature = "any_zlib")] pub fn new_gzip(level: Compression, window_bits: u8) -> Compress { assert!( @@ -356,19 +341,12 @@ impl Compress { output: &mut Vec<u8>, flush: FlushCompress, ) -> Result<Status, CompressError> { - let cap = output.capacity(); - let len = output.len(); - - unsafe { + write_to_spare_capacity_of_vec(output, |out| { let before = self.total_out(); - let ret = { - let ptr = output.as_mut_ptr().offset(len as isize); - let out = slice::from_raw_parts_mut(ptr, cap - len); - self.compress(input, out, flush) - }; - output.set_len((self.total_out() - before) as usize + len); - ret - } + let ret = self.compress(input, out, flush); + let bytes_written = self.total_out() - before; + (bytes_written as usize, ret) + }) } } @@ -393,11 +371,6 @@ impl Decompress { /// /// If `window_bits` does not fall into the range 9 ..= 15, /// `new_with_window_bits` will panic. - /// - /// # Note - /// - /// This constructor is only available when the `zlib` feature is used. - /// Other backends currently do not support custom window bits. #[cfg(feature = "any_zlib")] pub fn new_with_window_bits(zlib_header: bool, window_bits: u8) -> Decompress { assert!( @@ -418,11 +391,6 @@ impl Decompress { /// /// If `window_bits` does not fall into the range 9 ..= 15, /// `new_with_window_bits` will panic. - /// - /// # Note - /// - /// This constructor is only available when the `zlib` feature is used. - /// Other backends currently do not support gzip headers for Decompress. #[cfg(feature = "any_zlib")] pub fn new_gzip(window_bits: u8) -> Decompress { assert!( @@ -497,19 +465,12 @@ impl Decompress { output: &mut Vec<u8>, flush: FlushDecompress, ) -> Result<Status, DecompressError> { - let cap = output.capacity(); - let len = output.len(); - - unsafe { + write_to_spare_capacity_of_vec(output, |out| { let before = self.total_out(); - let ret = { - let ptr = output.as_mut_ptr().offset(len as isize); - let out = slice::from_raw_parts_mut(ptr, cap - len); - self.decompress(input, out, flush) - }; - output.set_len((self.total_out() - before) as usize + len); - ret - } + let ret = self.decompress(input, out, flush); + let bytes_written = self.total_out() - before; + (bytes_written as usize, ret) + }) } /// Specifies the decompression dictionary to use. @@ -598,6 +559,29 @@ impl fmt::Display for CompressError { } } +/// Allows `writer` to write data into the spare capacity of the `output` vector. +/// This will not reallocate the vector provided or attempt to grow it, so space +/// for the `output` must be reserved by the caller before calling this +/// function. +/// +/// `writer` needs to return the number of bytes written (and can also return +/// another arbitrary return value). +fn write_to_spare_capacity_of_vec<T>( + output: &mut Vec<u8>, + writer: impl FnOnce(&mut [u8]) -> (usize, T), +) -> T { + let cap = output.capacity(); + let len = output.len(); + + output.resize(output.capacity(), 0); + let (bytes_written, ret) = writer(&mut output[len..]); + + let new_len = core::cmp::min(len + bytes_written, cap); // Sanitizes `bytes_written`. + output.resize(new_len, 0 /* unused */); + + ret +} + #[cfg(test)] mod tests { use std::io::Write; @@ -143,10 +143,8 @@ where // then we need to keep asking for more data because if we // return that 0 bytes of data have been read then it will // be interpreted as EOF. - Ok(Status::Ok) | Ok(Status::BufError) if read == 0 && !eof && !dst.is_empty() => { - continue - } - Ok(Status::Ok) | Ok(Status::BufError) | Ok(Status::StreamEnd) => return Ok(read), + Ok(Status::Ok | Status::BufError) if read == 0 && !eof && !dst.is_empty() => continue, + Ok(Status::Ok | Status::BufError | Status::StreamEnd) => return Ok(read), Err(..) => { return Err(io::Error::new( diff --git a/src/zlib/bufread.rs b/src/zlib/bufread.rs index f1d3231..85bbd38 100644 --- a/src/zlib/bufread.rs +++ b/src/zlib/bufread.rs @@ -7,9 +7,10 @@ use crate::{Compress, Decompress}; /// A ZLIB encoder, or compressor. /// -/// This structure consumes a [`BufRead`] interface, reading uncompressed data -/// from the underlying reader, and emitting compressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// uncompressed data from the underlying [`BufRead`] and provides the compressed data. /// +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -47,6 +48,15 @@ impl<R: BufRead> ZlibEncoder<R> { data: Compress::new(level, true), } } + + /// Creates a new encoder with the given `compression` settings which will + /// read uncompressed data from the given stream `r` and emit the compressed stream. + pub fn new_with_compress(r: R, compression: Compress) -> ZlibEncoder<R> { + ZlibEncoder { + obj: r, + data: compression, + } + } } pub fn reset_encoder_data<R>(zlib: &mut ZlibEncoder<R>) { @@ -119,9 +129,10 @@ impl<R: BufRead + Write> Write for ZlibEncoder<R> { /// A ZLIB decoder, or decompressor. /// -/// This structure consumes a [`BufRead`] interface, reading compressed data -/// from the underlying reader, and emitting uncompressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`BufRead`] and provides the uncompressed data. /// +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -165,6 +176,15 @@ impl<R: BufRead> ZlibDecoder<R> { data: Decompress::new(true), } } + + /// Creates a new decoder which will decompress data read from the given + /// stream, using the given `decompression` settings. + pub fn new_with_decompress(r: R, decompression: Decompress) -> ZlibDecoder<R> { + ZlibDecoder { + obj: r, + data: decompression, + } + } } pub fn reset_decoder_data<R>(zlib: &mut ZlibDecoder<R>) { @@ -192,7 +212,7 @@ impl<R> ZlibDecoder<R> { /// Acquires a mutable reference to the underlying stream /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { &mut self.obj } diff --git a/src/zlib/mod.rs b/src/zlib/mod.rs index 9d3de95..1a293ba 100644 --- a/src/zlib/mod.rs +++ b/src/zlib/mod.rs @@ -19,14 +19,14 @@ mod tests { let v = crate::random_bytes().take(1024).collect::<Vec<_>>(); for _ in 0..200 { let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); + real.extend(to_write.iter().copied()); w.write_all(to_write).unwrap(); } let result = w.finish().unwrap(); let mut r = read::ZlibDecoder::new(&result[..]); let mut ret = Vec::new(); r.read_to_end(&mut ret).unwrap(); - assert!(ret == real); + assert_eq!(ret, real); } #[test] @@ -38,7 +38,7 @@ mod tests { let mut r = read::ZlibDecoder::new(&data[..]); let mut ret = Vec::new(); r.read_to_end(&mut ret).unwrap(); - assert!(ret == b"foo"); + assert_eq!(ret, b"foo"); } #[test] @@ -48,7 +48,7 @@ mod tests { let v = crate::random_bytes().take(1024).collect::<Vec<_>>(); for _ in 0..200 { let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); + real.extend(to_write.iter().copied()); w.write_all(to_write).unwrap(); } let mut result = w.finish().unwrap(); @@ -56,13 +56,13 @@ mod tests { let result_len = result.len(); for _ in 0..200 { - result.extend(v.iter().map(|x| *x)); + result.extend(v.iter().copied()); } let mut r = read::ZlibDecoder::new(&result[..]); let mut ret = Vec::new(); r.read_to_end(&mut ret).unwrap(); - assert!(ret == real); + assert_eq!(ret, real); assert_eq!(r.total_in(), result_len as u64); } @@ -82,7 +82,7 @@ mod tests { write::ZlibEncoder::new(write::ZlibDecoder::new(Vec::new()), Compression::default()); w.write_all(&v).unwrap(); let w = w.finish().unwrap().finish().unwrap(); - assert!(w == v); + assert_eq!(w, v); } #[test] diff --git a/src/zlib/read.rs b/src/zlib/read.rs index 5094931..3b41ae6 100644 --- a/src/zlib/read.rs +++ b/src/zlib/read.rs @@ -3,11 +3,12 @@ use std::io::prelude::*; use super::bufread; use crate::bufreader::BufReader; +use crate::Decompress; /// A ZLIB encoder, or compressor. /// -/// This structure implements a [`Read`] interface and will read uncompressed -/// data from an underlying stream and emit a stream of compressed data. +/// This structure implements a [`Read`] interface. When read from, it reads +/// uncompressed data from the underlying [`Read`] and provides the compressed data. /// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// @@ -24,9 +25,9 @@ use crate::bufreader::BufReader; /// # fn open_hello_world() -> std::io::Result<Vec<u8>> { /// let f = File::open("examples/hello_world.txt")?; /// let mut z = ZlibEncoder::new(f, Compression::fast()); -/// let mut buffer = [0;50]; -/// let byte_count = z.read(&mut buffer)?; -/// # Ok(buffer[0..byte_count].to_vec()) +/// let mut buffer = Vec::new(); +/// z.read_to_end(&mut buffer)?; +/// # Ok(buffer) /// # } /// ``` #[derive(Debug)] @@ -42,6 +43,14 @@ impl<R: Read> ZlibEncoder<R> { inner: bufread::ZlibEncoder::new(BufReader::new(r), level), } } + + /// Creates a new encoder with the given `compression` settings which will + /// read uncompressed data from the given stream `r` and emit the compressed stream. + pub fn new_with_compress(r: R, compression: crate::Compress) -> ZlibEncoder<R> { + ZlibEncoder { + inner: bufread::ZlibEncoder::new_with_compress(BufReader::new(r), compression), + } + } } impl<R> ZlibEncoder<R> { @@ -117,8 +126,8 @@ impl<W: Read + Write> Write for ZlibEncoder<W> { /// A ZLIB decoder, or decompressor. /// -/// This structure implements a [`Read`] interface and takes a stream of -/// compressed data as input, providing the decompressed data when read from. +/// This structure implements a [`Read`] interface. When read from, it reads +/// compressed data from the underlying [`Read`] and provides the uncompressed data. /// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// @@ -160,7 +169,8 @@ impl<R: Read> ZlibDecoder<R> { ZlibDecoder::new_with_buf(r, vec![0; 32 * 1024]) } - /// Same as `new`, but the intermediate buffer for data is specified. + /// Creates a new decoder which will decompress data read from the given + /// stream `r`, using `buf` as backing to speed up reading. /// /// Note that the specified buffer will only be used up to its current /// length. The buffer's capacity will also not grow over time. @@ -169,6 +179,31 @@ impl<R: Read> ZlibDecoder<R> { inner: bufread::ZlibDecoder::new(BufReader::with_buf(buf, r)), } } + + /// Creates a new decoder which will decompress data read from the given + /// stream `r`, along with `decompression` settings. + pub fn new_with_decompress(r: R, decompression: Decompress) -> ZlibDecoder<R> { + ZlibDecoder::new_with_decompress_and_buf(r, vec![0; 32 * 1024], decompression) + } + + /// Creates a new decoder which will decompress data read from the given + /// stream `r`, using `buf` as backing to speed up reading, + /// along with `decompression` settings to configure decoder. + /// + /// Note that the specified buffer will only be used up to its current + /// length. The buffer's capacity will also not grow over time. + pub fn new_with_decompress_and_buf( + r: R, + buf: Vec<u8>, + decompression: Decompress, + ) -> ZlibDecoder<R> { + ZlibDecoder { + inner: bufread::ZlibDecoder::new_with_decompress( + BufReader::with_buf(buf, r), + decompression, + ), + } + } } impl<R> ZlibDecoder<R> { @@ -195,7 +230,7 @@ impl<R> ZlibDecoder<R> { /// Acquires a mutable reference to the underlying stream /// /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. + /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.inner.get_mut().get_mut() } diff --git a/src/zlib/write.rs b/src/zlib/write.rs index c671814..d8ad2f2 100644 --- a/src/zlib/write.rs +++ b/src/zlib/write.rs @@ -44,6 +44,14 @@ impl<W: Write> ZlibEncoder<W> { } } + /// Creates a new encoder which will write compressed data to the stream + /// `w` with the given `compression` settings. + pub fn new_with_compress(w: W, compression: Compress) -> ZlibEncoder<W> { + ZlibEncoder { + inner: zio::Writer::new(w, compression), + } + } + /// Acquires a reference to the underlying writer. pub fn get_ref(&self) -> &W { self.inner.get_ref() @@ -218,6 +226,17 @@ impl<W: Write> ZlibDecoder<W> { } } + /// Creates a new decoder which will write uncompressed data to the stream `w` + /// using the given `decompression` settings. + /// + /// When this decoder is dropped or unwrapped the final pieces of data will + /// be flushed. + pub fn new_with_decompress(w: W, decompression: Decompress) -> ZlibDecoder<W> { + ZlibDecoder { + inner: zio::Writer::new(w, decompression), + } + } + /// Acquires a reference to the underlying writer. pub fn get_ref(&self) -> &W { self.inner.get_ref() diff --git a/tests/early-flush.rs b/tests/early-flush.rs index e717ada..b43f474 100644 --- a/tests/early-flush.rs +++ b/tests/early-flush.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use std::io::{Read, Write}; use flate2::read::GzDecoder; diff --git a/tests/empty-read.rs b/tests/empty-read.rs index 7551238..3724c23 100644 --- a/tests/empty-read.rs +++ b/tests/empty-read.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use std::io::{Read, Write}; #[test] diff --git a/tests/gunzip.rs b/tests/gunzip.rs index c382032..f7d4132 100644 --- a/tests/gunzip.rs +++ b/tests/gunzip.rs @@ -1,5 +1,3 @@ -extern crate flate2; - use flate2::read::GzDecoder; use flate2::read::MultiGzDecoder; use std::fs::File; @@ -16,7 +14,7 @@ fn test_extract_success() { .unwrap() .read_to_end(&mut expected) .unwrap(); - assert!(content == expected); + assert_eq!(content, expected); } // // test partial extraction of a multistream gzipped file diff --git a/tests/zero-write.rs b/tests/zero-write.rs index f0db86c..22be8ee 100644 --- a/tests/zero-write.rs +++ b/tests/zero-write.rs @@ -1,5 +1,3 @@ -extern crate flate2; - #[test] fn zero_write_is_error() { let mut buf = [0u8]; |