diff options
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | Android.bp | 10 | ||||
-rw-r--r-- | Cargo.toml | 13 | ||||
-rw-r--r-- | Cargo.toml.orig | 4 | ||||
-rw-r--r-- | METADATA | 9 | ||||
-rw-r--r-- | TEST_MAPPING | 37 | ||||
-rw-r--r-- | cargo2android.json | 9 | ||||
-rw-r--r-- | examples/deflateencoder-read.rs | 6 | ||||
-rw-r--r-- | examples/gzencoder-read.rs | 6 | ||||
-rw-r--r-- | examples/zlibencoder-read.rs | 6 | ||||
-rw-r--r-- | src/ffi/c.rs | 176 | ||||
-rw-r--r-- | src/ffi/mod.rs | 6 | ||||
-rw-r--r-- | src/ffi/rust.rs | 16 | ||||
-rw-r--r-- | src/gz/bufread.rs | 451 | ||||
-rw-r--r-- | src/gz/mod.rs | 29 | ||||
-rw-r--r-- | src/mem.rs | 99 | ||||
-rw-r--r-- | tests/corrupt-gz-file.bin (renamed from tests/corrupt-file.gz) | bin | 7128 -> 7128 bytes | |||
-rw-r--r-- | tests/gunzip.rs | 2 |
18 files changed, 629 insertions, 252 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 625d5da..551b999 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,5 @@ { "git": { - "sha1": "90d9e5ed866742ce8b3946d156830e300d1e5aab" + "sha1": "63ecb8c0407c619c7a20529699b89369061ece88" } } @@ -1,8 +1,6 @@ -// This file is generated by cargo2android.py --run --device --features zlib. +// This file is generated by cargo2android.py --config cargo2android.json. // Do not modify this file as changes will be overridden on upgrade. - - package { default_applicable_licenses: ["external_rust_crates_flate2_license"], } @@ -43,6 +41,8 @@ rust_library { name: "libflate2", host_supported: true, crate_name: "flate2", + cargo_env_compat: true, + cargo_pkg_version: "1.0.22", srcs: ["src/lib.rs"], edition: "2018", features: [ @@ -56,4 +56,8 @@ rust_library { "liblibc", "liblibz_sys", ], + apex_available: [ + "//apex_available:platform", + "com.android.virt", + ], } @@ -3,17 +3,16 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "flate2" -version = "1.0.20" +version = "1.0.22" authors = ["Alex Crichton <alex@alexcrichton.com>", "Josh Triplett <josh@joshtriplett.org>"] description = "DEFLATE compression and decompression exposed as Read/BufRead/Write streams.\nSupports miniz_oxide, miniz.c, and multiple zlib implementations. Supports\nzlib, gzip, and raw deflate streams.\n" homepage = "https://github.com/rust-lang/flate2-rs" @@ -27,7 +26,7 @@ repository = "https://github.com/rust-lang/flate2-rs" version = "1.0.0" [dependencies.cloudflare-zlib-sys] -version = "0.2.0" +version = "0.3.0" optional = true [dependencies.crc32fast] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index c9e28f9..08ee25b 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,7 +1,7 @@ [package] name = "flate2" authors = ["Alex Crichton <alex@alexcrichton.com>", "Josh Triplett <josh@joshtriplett.org>"] -version = "1.0.20" +version = "1.0.22" edition = "2018" license = "MIT/Apache-2.0" readme = "README.md" @@ -24,7 +24,7 @@ libc = "0.2.65" cfg-if = "1.0.0" miniz-sys = { path = "miniz-sys", version = "0.1.11", optional = true } libz-sys = { version = "1.1.0", optional = true, default-features = false } -cloudflare-zlib-sys = { version = "0.2.0", optional = true } +cloudflare-zlib-sys = { version = "0.3.0", optional = true } tokio-io = { version = "0.1.11", optional = true } futures = { version = "0.1.25", optional = true } miniz_oxide = { version = "0.4.0", optional = true, default-features = false } @@ -7,14 +7,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/flate2/flate2-1.0.20.crate" + value: "https://static.crates.io/crates/flate2/flate2-1.0.22.crate" } - version: "1.0.20" - # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same. + version: "1.0.22" license_type: NOTICE last_upgrade_date { year: 2021 - month: 5 - day: 11 + month: 9 + day: 22 } } diff --git a/TEST_MAPPING b/TEST_MAPPING new file mode 100644 index 0000000..7325ef4 --- /dev/null +++ b/TEST_MAPPING @@ -0,0 +1,37 @@ +// Generated by update_crate_tests.py for tests that depend on this crate. +{ + "presubmit": [ + { + "name": "ZipFuseTest" + }, + { + "name": "libapkverify.integration_test" + }, + { + "name": "libapkverify.test" + }, + { + "name": "microdroid_manager_test" + }, + { + "name": "virtualizationservice_device_test" + } + ], + "presubmit-rust": [ + { + "name": "ZipFuseTest" + }, + { + "name": "libapkverify.integration_test" + }, + { + "name": "libapkverify.test" + }, + { + "name": "microdroid_manager_test" + }, + { + "name": "virtualizationservice_device_test" + } + ] +} diff --git a/cargo2android.json b/cargo2android.json new file mode 100644 index 0000000..85a6aa6 --- /dev/null +++ b/cargo2android.json @@ -0,0 +1,9 @@ +{ + "apex-available": [ + "//apex_available:platform", + "com.android.virt" + ], + "device": true, + "features": "zlib", + "run": true +}
\ No newline at end of file diff --git a/examples/deflateencoder-read.rs b/examples/deflateencoder-read.rs index d22777f..47e4784 100644 --- a/examples/deflateencoder-read.rs +++ b/examples/deflateencoder-read.rs @@ -12,9 +12,9 @@ fn main() { // Return a vector containing the Defalte compressed version of hello world fn deflateencoder_read_hello_world() -> io::Result<Vec<u8>> { - let mut ret_vec = [0; 100]; + let mut result = Vec::new(); let c = b"hello world"; let mut deflater = DeflateEncoder::new(&c[..], Compression::fast()); - let count = deflater.read(&mut ret_vec)?; - Ok(ret_vec[0..count].to_vec()) + deflater.read_to_end(&mut result)?; + Ok(result) } diff --git a/examples/gzencoder-read.rs b/examples/gzencoder-read.rs index a9657ac..3f1262b 100644 --- a/examples/gzencoder-read.rs +++ b/examples/gzencoder-read.rs @@ -12,9 +12,9 @@ fn main() { // Return a vector containing the GZ compressed version of hello world fn gzencoder_read_hello_world() -> io::Result<Vec<u8>> { - let mut ret_vec = [0; 100]; + let mut result = Vec::new(); let c = b"hello world"; let mut z = GzEncoder::new(&c[..], Compression::fast()); - let count = z.read(&mut ret_vec)?; - Ok(ret_vec[0..count].to_vec()) + z.read_to_end(&mut result)?; + Ok(result) } diff --git a/examples/zlibencoder-read.rs b/examples/zlibencoder-read.rs index b0ae50a..779eb1d 100644 --- a/examples/zlibencoder-read.rs +++ b/examples/zlibencoder-read.rs @@ -15,7 +15,7 @@ fn main() { fn open_hello_world() -> std::io::Result<Vec<u8>> { let f = File::open("examples/hello_world.txt")?; let mut z = ZlibEncoder::new(f, Compression::fast()); - let mut buffer = [0; 50]; - let byte_count = z.read(&mut buffer)?; - Ok(buffer[0..byte_count].to_vec()) + let mut result = Vec::new(); + z.read_to_end(&mut result)?; + Ok(result) } diff --git a/src/ffi/c.rs b/src/ffi/c.rs index 9763eec..1ab6cab 100644 --- a/src/ffi/c.rs +++ b/src/ffi/c.rs @@ -12,6 +12,23 @@ pub use libc::{c_int, c_uint, c_void, size_t}; use super::*; use crate::mem::{self, FlushDecompress, Status}; +// miniz doesn't provide any error messages, so only enable the field when we use a real zlib +#[derive(Default)] +pub struct ErrorMessage(#[cfg(feature = "any_zlib")] Option<&'static str>); + +impl ErrorMessage { + pub fn get(&self) -> Option<&str> { + #[cfg(feature = "any_zlib")] + { + self.0 + } + #[cfg(not(feature = "any_zlib"))] + { + None + } + } +} + pub struct StreamWrapper { pub inner: Box<mz_stream>, } @@ -38,13 +55,13 @@ impl Default for StreamWrapper { reserved: 0, opaque: ptr::null_mut(), state: ptr::null_mut(), - #[cfg(feature = "any_zlib")] + #[cfg(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys")))] zalloc, - #[cfg(feature = "any_zlib")] + #[cfg(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys")))] zfree, - #[cfg(not(feature = "any_zlib"))] + #[cfg(not(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys"))))] zalloc: Some(zalloc), - #[cfg(not(feature = "any_zlib"))] + #[cfg(not(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys"))))] zfree: Some(zfree), }), } @@ -138,6 +155,25 @@ pub struct Stream<D: Direction> { pub _marker: marker::PhantomData<D>, } +impl<D: Direction> Stream<D> { + pub fn msg(&self) -> ErrorMessage { + #[cfg(feature = "any_zlib")] + { + let msg = self.stream_wrapper.msg; + ErrorMessage(if msg.is_null() { + None + } else { + let s = unsafe { std::ffi::CStr::from_ptr(msg) }; + std::str::from_utf8(s.to_bytes()).ok() + }) + } + #[cfg(not(feature = "any_zlib"))] + { + ErrorMessage() + } + } +} + impl<D: Direction> Drop for Stream<D> { fn drop(&mut self) { unsafe { @@ -193,6 +229,7 @@ impl InflateBackend for Inflate { flush: FlushDecompress, ) -> Result<Status, DecompressError> { let raw = &mut *self.inner.stream_wrapper; + raw.msg = ptr::null_mut(); raw.next_in = input.as_ptr() as *mut u8; raw.avail_in = cmp::min(input.len(), c_uint::max_value() as usize) as c_uint; raw.next_out = output.as_mut_ptr(); @@ -206,7 +243,7 @@ impl InflateBackend for Inflate { self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64; match rc { - MZ_DATA_ERROR | MZ_STREAM_ERROR => mem::decompress_failed(), + MZ_DATA_ERROR | MZ_STREAM_ERROR => mem::decompress_failed(self.inner.msg()), MZ_OK => Ok(Status::Ok), MZ_BUF_ERROR => Ok(Status::BufError), MZ_STREAM_END => Ok(Status::StreamEnd), @@ -286,6 +323,7 @@ impl DeflateBackend for Deflate { flush: FlushCompress, ) -> Result<Status, CompressError> { let raw = &mut *self.inner.stream_wrapper; + raw.msg = ptr::null_mut(); raw.next_in = input.as_ptr() as *mut _; raw.avail_in = cmp::min(input.len(), c_uint::max_value() as usize) as c_uint; raw.next_out = output.as_mut_ptr(); @@ -302,7 +340,7 @@ impl DeflateBackend for Deflate { MZ_OK => Ok(Status::Ok), MZ_BUF_ERROR => Ok(Status::BufError), MZ_STREAM_END => Ok(Status::StreamEnd), - MZ_STREAM_ERROR => Err(CompressError(())), + MZ_STREAM_ERROR => mem::compress_failed(self.inner.msg()), c => panic!("unknown return code: {}", c), } } @@ -336,103 +374,41 @@ mod c_backend { pub type AllocSize = libc::size_t; } -/// Zlib specific -#[cfg(any( - feature = "zlib-ng-compat", - all(feature = "zlib", not(feature = "cloudflare_zlib")) -))] -#[allow(bad_style)] -mod c_backend { - use libc::{c_char, c_int}; - use std::mem; - - pub use libz_sys::deflate as mz_deflate; - pub use libz_sys::deflateEnd as mz_deflateEnd; - pub use libz_sys::deflateReset as mz_deflateReset; - pub use libz_sys::inflate as mz_inflate; - pub use libz_sys::inflateEnd as mz_inflateEnd; - pub use libz_sys::z_stream as mz_stream; - pub use libz_sys::*; - - pub use libz_sys::Z_BLOCK as MZ_BLOCK; - pub use libz_sys::Z_BUF_ERROR as MZ_BUF_ERROR; - pub use libz_sys::Z_DATA_ERROR as MZ_DATA_ERROR; - pub use libz_sys::Z_DEFAULT_STRATEGY as MZ_DEFAULT_STRATEGY; - pub use libz_sys::Z_DEFLATED as MZ_DEFLATED; - pub use libz_sys::Z_FINISH as MZ_FINISH; - pub use libz_sys::Z_FULL_FLUSH as MZ_FULL_FLUSH; - pub use libz_sys::Z_NEED_DICT as MZ_NEED_DICT; - pub use libz_sys::Z_NO_FLUSH as MZ_NO_FLUSH; - pub use libz_sys::Z_OK as MZ_OK; - pub use libz_sys::Z_PARTIAL_FLUSH as MZ_PARTIAL_FLUSH; - pub use libz_sys::Z_STREAM_END as MZ_STREAM_END; - pub use libz_sys::Z_STREAM_ERROR as MZ_STREAM_ERROR; - pub use libz_sys::Z_SYNC_FLUSH as MZ_SYNC_FLUSH; - pub type AllocSize = libz_sys::uInt; - - pub const MZ_DEFAULT_WINDOW_BITS: c_int = 15; - - const ZLIB_VERSION: &'static str = "1.2.8\0"; - - pub unsafe extern "C" fn mz_deflateInit2( - stream: *mut mz_stream, - level: c_int, - method: c_int, - window_bits: c_int, - mem_level: c_int, - strategy: c_int, - ) -> c_int { - libz_sys::deflateInit2_( - stream, - level, - method, - window_bits, - mem_level, - strategy, - ZLIB_VERSION.as_ptr() as *const c_char, - mem::size_of::<mz_stream>() as c_int, - ) - } - pub unsafe extern "C" fn mz_inflateInit2(stream: *mut mz_stream, window_bits: c_int) -> c_int { - libz_sys::inflateInit2_( - stream, - window_bits, - ZLIB_VERSION.as_ptr() as *const c_char, - mem::size_of::<mz_stream>() as c_int, - ) - } -} - -/// Cloudflare optimized Zlib specific -#[cfg(all(feature = "cloudflare_zlib", not(feature = "zlib-ng-compat")))] +/// Zlib specific - make zlib mimic miniz' API +#[cfg(feature = "any_zlib")] #[allow(bad_style)] mod c_backend { use libc::{c_char, c_int}; use std::mem; - pub use cloudflare_zlib_sys::deflate as mz_deflate; - pub use cloudflare_zlib_sys::deflateEnd as mz_deflateEnd; - pub use cloudflare_zlib_sys::deflateReset as mz_deflateReset; - pub use cloudflare_zlib_sys::inflate as mz_inflate; - pub use cloudflare_zlib_sys::inflateEnd as mz_inflateEnd; - pub use cloudflare_zlib_sys::z_stream as mz_stream; - pub use cloudflare_zlib_sys::*; - - pub use cloudflare_zlib_sys::Z_BLOCK as MZ_BLOCK; - pub use cloudflare_zlib_sys::Z_BUF_ERROR as MZ_BUF_ERROR; - pub use cloudflare_zlib_sys::Z_DATA_ERROR as MZ_DATA_ERROR; - pub use cloudflare_zlib_sys::Z_DEFAULT_STRATEGY as MZ_DEFAULT_STRATEGY; - pub use cloudflare_zlib_sys::Z_DEFLATED as MZ_DEFLATED; - pub use cloudflare_zlib_sys::Z_FINISH as MZ_FINISH; - pub use cloudflare_zlib_sys::Z_FULL_FLUSH as MZ_FULL_FLUSH; - pub use cloudflare_zlib_sys::Z_NEED_DICT as MZ_NEED_DICT; - pub use cloudflare_zlib_sys::Z_NO_FLUSH as MZ_NO_FLUSH; - pub use cloudflare_zlib_sys::Z_OK as MZ_OK; - pub use cloudflare_zlib_sys::Z_PARTIAL_FLUSH as MZ_PARTIAL_FLUSH; - pub use cloudflare_zlib_sys::Z_STREAM_END as MZ_STREAM_END; - pub use cloudflare_zlib_sys::Z_STREAM_ERROR as MZ_STREAM_ERROR; - pub use cloudflare_zlib_sys::Z_SYNC_FLUSH as MZ_SYNC_FLUSH; - pub type AllocSize = cloudflare_zlib_sys::uInt; + #[cfg(feature = "cloudflare_zlib")] + use cloudflare_zlib_sys as libz; + #[cfg(not(feature = "cloudflare_zlib"))] + use libz_sys as libz; + + pub use libz::deflate as mz_deflate; + pub use libz::deflateEnd as mz_deflateEnd; + pub use libz::deflateReset as mz_deflateReset; + pub use libz::inflate as mz_inflate; + pub use libz::inflateEnd as mz_inflateEnd; + pub use libz::z_stream as mz_stream; + pub use libz::*; + + pub use libz::Z_BLOCK as MZ_BLOCK; + pub use libz::Z_BUF_ERROR as MZ_BUF_ERROR; + pub use libz::Z_DATA_ERROR as MZ_DATA_ERROR; + pub use libz::Z_DEFAULT_STRATEGY as MZ_DEFAULT_STRATEGY; + pub use libz::Z_DEFLATED as MZ_DEFLATED; + pub use libz::Z_FINISH as MZ_FINISH; + pub use libz::Z_FULL_FLUSH as MZ_FULL_FLUSH; + pub use libz::Z_NEED_DICT as MZ_NEED_DICT; + pub use libz::Z_NO_FLUSH as MZ_NO_FLUSH; + pub use libz::Z_OK as MZ_OK; + pub use libz::Z_PARTIAL_FLUSH as MZ_PARTIAL_FLUSH; + pub use libz::Z_STREAM_END as MZ_STREAM_END; + pub use libz::Z_STREAM_ERROR as MZ_STREAM_ERROR; + pub use libz::Z_SYNC_FLUSH as MZ_SYNC_FLUSH; + pub type AllocSize = libz::uInt; pub const MZ_DEFAULT_WINDOW_BITS: c_int = 15; @@ -446,7 +422,7 @@ mod c_backend { mem_level: c_int, strategy: c_int, ) -> c_int { - cloudflare_zlib_sys::deflateInit2_( + libz::deflateInit2_( stream, level, method, @@ -458,7 +434,7 @@ mod c_backend { ) } pub unsafe extern "C" fn mz_inflateInit2(stream: *mut mz_stream, window_bits: c_int) -> c_int { - cloudflare_zlib_sys::inflateInit2_( + libz::inflateInit2_( stream, window_bits, ZLIB_VERSION.as_ptr() as *const c_char, diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index 4135e11..83e632d 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -44,3 +44,9 @@ cfg_if::cfg_if! { pub use self::rust::*; } } + +impl std::fmt::Debug for ErrorMessage { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + self.get().fmt(f) + } +} diff --git a/src/ffi/rust.rs b/src/ffi/rust.rs index ea95a29..eadd6ec 100644 --- a/src/ffi/rust.rs +++ b/src/ffi/rust.rs @@ -16,6 +16,16 @@ pub const MZ_FINISH: isize = MZFlush::Finish as isize; use super::*; use crate::mem; +// miniz_oxide doesn't provide any error messages (yet?) +#[derive(Default)] +pub struct ErrorMessage; + +impl ErrorMessage { + pub fn get(&self) -> Option<&str> { + None + } +} + fn format_from_bool(zlib_header: bool) -> DataFormat { if zlib_header { DataFormat::Zlib @@ -73,7 +83,7 @@ impl InflateBackend for Inflate { }, Err(status) => match status { MZError::Buf => Ok(Status::BufError), - _ => mem::decompress_failed(), + _ => mem::decompress_failed(ErrorMessage), }, } } @@ -144,11 +154,11 @@ impl DeflateBackend for Deflate { Ok(status) => match status { MZStatus::Ok => Ok(Status::Ok), MZStatus::StreamEnd => Ok(Status::StreamEnd), - MZStatus::NeedDict => Err(CompressError(())), + MZStatus::NeedDict => mem::compress_failed(ErrorMessage), }, Err(status) => match status { MZError::Buf => Ok(Status::BufError), - _ => Err(CompressError(())), + _ => mem::compress_failed(ErrorMessage), }, } } diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index b66ab72..eb0b332 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -10,7 +10,7 @@ use tokio_io::{AsyncRead, AsyncWrite}; use super::{GzBuilder, GzHeader}; use super::{FCOMMENT, FEXTRA, FHCRC, FNAME}; -use crate::crc::CrcReader; +use crate::crc::{Crc, CrcReader}; use crate::deflate; use crate::Compression; @@ -34,87 +34,107 @@ fn bad_header() -> io::Error { io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header") } -fn read_le_u16<R: Read>(r: &mut R) -> io::Result<u16> { +fn read_le_u16<R: Read>(r: &mut Buffer<R>) -> io::Result<u16> { let mut b = [0; 2]; - r.read_exact(&mut b)?; + r.read_and_forget(&mut b)?; Ok((b[0] as u16) | ((b[1] as u16) << 8)) } -pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> { - let mut crc_reader = CrcReader::new(r); - let mut header = [0; 10]; - crc_reader.read_exact(&mut header)?; - - let id1 = header[0]; - let id2 = header[1]; - if id1 != 0x1f || id2 != 0x8b { - return Err(bad_header()); - } - let cm = header[2]; - if cm != 8 { - return Err(bad_header()); - } - - let flg = header[3]; - let mtime = ((header[4] as u32) << 0) - | ((header[5] as u32) << 8) - | ((header[6] as u32) << 16) - | ((header[7] as u32) << 24); - let _xfl = header[8]; - let os = header[9]; - - let extra = if flg & FEXTRA != 0 { - let xlen = read_le_u16(&mut crc_reader)?; - let mut extra = vec![0; xlen as usize]; - crc_reader.read_exact(&mut extra)?; - Some(extra) - } else { - None - }; - let filename = if flg & FNAME != 0 { - // wow this is slow - let mut b = Vec::new(); - for byte in crc_reader.by_ref().bytes() { - let byte = byte?; - if byte == 0 { - break; +fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> { + loop { + match r.part.state { + GzHeaderParsingState::Start => { + let mut header = [0; 10]; + r.read_and_forget(&mut header)?; + + if header[0] != 0x1f || header[1] != 0x8b { + return Err(bad_header()); + } + if header[2] != 8 { + return Err(bad_header()); + } + + r.part.flg = header[3]; + r.part.header.mtime = ((header[4] as u32) << 0) + | ((header[5] as u32) << 8) + | ((header[6] as u32) << 16) + | ((header[7] as u32) << 24); + let _xfl = header[8]; + r.part.header.operating_system = header[9]; + r.part.state = GzHeaderParsingState::Xlen; } - b.push(byte); - } - Some(b) - } else { - None - }; - let comment = if flg & FCOMMENT != 0 { - // wow this is slow - let mut b = Vec::new(); - for byte in crc_reader.by_ref().bytes() { - let byte = byte?; - if byte == 0 { - break; + GzHeaderParsingState::Xlen => { + if r.part.flg & FEXTRA != 0 { + r.part.xlen = read_le_u16(r)?; + } + r.part.state = GzHeaderParsingState::Extra; + } + GzHeaderParsingState::Extra => { + if r.part.flg & FEXTRA != 0 { + let mut extra = vec![0; r.part.xlen as usize]; + r.read_and_forget(&mut extra)?; + r.part.header.extra = Some(extra); + } + r.part.state = GzHeaderParsingState::Filename; + } + GzHeaderParsingState::Filename => { + if r.part.flg & FNAME != 0 { + if None == r.part.header.filename { + r.part.header.filename = Some(Vec::new()); + }; + for byte in r.bytes() { + let byte = byte?; + if byte == 0 { + break; + } + } + } + r.part.state = GzHeaderParsingState::Comment; + } + GzHeaderParsingState::Comment => { + if r.part.flg & FCOMMENT != 0 { + if None == r.part.header.comment { + r.part.header.comment = Some(Vec::new()); + }; + for byte in r.bytes() { + let byte = byte?; + if byte == 0 { + break; + } + } + } + r.part.state = GzHeaderParsingState::Crc; + } + GzHeaderParsingState::Crc => { + if r.part.flg & FHCRC != 0 { + let stored_crc = read_le_u16(r)?; + let calced_crc = r.part.crc.sum() as u16; + if stored_crc != calced_crc { + return Err(corrupt()); + } + } + return Ok(()); } - b.push(byte); } - Some(b) - } else { - None + } +} + +pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> { + let mut part = GzHeaderPartial::new(); + + let result = { + let mut reader = Buffer::new(&mut part, r); + read_gz_header_part(&mut reader) }; - if flg & FHCRC != 0 { - let calced_crc = crc_reader.crc().sum() as u16; - let stored_crc = read_le_u16(&mut crc_reader)?; - if calced_crc != stored_crc { - return Err(corrupt()); + match result { + Ok(()) => { + return Ok(part.take_header()); } - } - - Ok(GzHeader { - extra: extra, - filename: filename, - comment: comment, - operating_system: os, - mtime: mtime, - }) + Err(err) => { + return Err(err); + } + }; } /// A gzip streaming encoder @@ -305,8 +325,51 @@ pub struct GzDecoder<R> { } #[derive(Debug)] +pub enum GzHeaderParsingState { + Start, + Xlen, + Extra, + Filename, + Comment, + Crc, +} + +#[derive(Debug)] +pub struct GzHeaderPartial { + buf: Vec<u8>, + state: GzHeaderParsingState, + flg: u8, + xlen: u16, + crc: Crc, + header: GzHeader, +} + +impl GzHeaderPartial { + fn new() -> GzHeaderPartial { + GzHeaderPartial { + buf: Vec::with_capacity(10), // minimum header length + state: GzHeaderParsingState::Start, + flg: 0, + xlen: 0, + crc: Crc::new(), + header: GzHeader { + extra: None, + filename: None, + comment: None, + operating_system: 0, + mtime: 0, + }, + } + } + + pub fn take_header(self) -> GzHeader { + return self.header; + } +} + +#[derive(Debug)] enum GzState { - Header(Vec<u8>), + Header(GzHeaderPartial), Body, Finished(usize, [u8; 8]), Err(io::Error), @@ -317,55 +380,91 @@ enum GzState { /// further data from `reader`. This will also buffer all data read from /// `reader` into `buf` for reuse on a further call. struct Buffer<'a, T: 'a> { - buf: &'a mut Vec<u8>, + part: &'a mut GzHeaderPartial, buf_cur: usize, buf_max: usize, reader: &'a mut T, } impl<'a, T> Buffer<'a, T> { - fn new(buf: &'a mut Vec<u8>, reader: &'a mut T) -> Buffer<'a, T> { + fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> { Buffer { reader, buf_cur: 0, - buf_max: buf.len(), - buf, + buf_max: part.buf.len(), + part, } } } impl<'a, T: Read> Read for Buffer<'a, T> { fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - if self.buf_cur == self.buf_max { + let mut bufref = match self.part.state { + GzHeaderParsingState::Filename => self.part.header.filename.as_mut(), + GzHeaderParsingState::Comment => self.part.header.comment.as_mut(), + _ => None, + }; + if let Some(ref mut b) = bufref { + // we have a direct reference to a buffer where to write let len = self.reader.read(buf)?; - self.buf.extend_from_slice(&buf[..len]); + if len > 0 && buf[len - 1] == 0 { + // we do not append the final 0 + b.extend_from_slice(&buf[..len - 1]); + } else { + b.extend_from_slice(&buf[..len]); + } + self.part.crc.update(&buf[..len]); + Ok(len) + } else if self.buf_cur == self.buf_max { + // we read new bytes and also save them in self.part.buf + let len = self.reader.read(buf)?; + self.part.buf.extend_from_slice(&buf[..len]); + self.part.crc.update(&buf[..len]); Ok(len) } else { - let len = (&self.buf[self.buf_cur..self.buf_max]).read(buf)?; + // we first read the previously saved bytes + let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?; self.buf_cur += len; Ok(len) } } } +impl<'a, T> Buffer<'a, T> +where + T: std::io::Read, +{ + // If we manage to read all the bytes, we reset the buffer + fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result<usize> { + self.read_exact(buf)?; + // we managed to read the whole buf + // we will no longer need the previously saved bytes in self.part.buf + let rlen = buf.len(); + self.part.buf.truncate(0); + self.buf_cur = 0; + self.buf_max = 0; + return Ok(rlen); + } +} + impl<R: BufRead> GzDecoder<R> { /// Creates a new decoder from the given reader, immediately parsing the /// gzip header. pub fn new(mut r: R) -> GzDecoder<R> { - let mut buf = Vec::with_capacity(10); // minimum header length + let mut part = GzHeaderPartial::new(); let mut header = None; let result = { - let mut reader = Buffer::new(&mut buf, &mut r); - read_gz_header(&mut reader) + let mut reader = Buffer::new(&mut part, &mut r); + read_gz_header_part(&mut reader) }; let state = match result { - Ok(hdr) => { - header = Some(hdr); + Ok(()) => { + header = Some(part.take_header()); GzState::Body } - Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(buf), + Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part), Err(err) => GzState::Err(err), }; @@ -419,20 +518,23 @@ impl<R: BufRead> Read for GzDecoder<R> { loop { *inner = match mem::replace(inner, GzState::End) { - GzState::Header(mut buf) => { + GzState::Header(mut part) => { let result = { - let mut reader = Buffer::new(&mut buf, reader.get_mut().get_mut()); - read_gz_header(&mut reader) + let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut()); + read_gz_header_part(&mut reader) }; - let hdr = result.map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Header(buf); + let state = match result { + Ok(()) => { + *header = Some(part.take_header()); + GzState::Body } - - err - })?; - *header = Some(hdr); - GzState::Body + Err(err) if io::ErrorKind::WouldBlock == err.kind() => { + *inner = GzState::Header(part); + return Err(err); + } + Err(err) => return Err(err), + }; + state } GzState::Body => { if into.is_empty() { @@ -503,7 +605,7 @@ impl<R: BufRead> Read for GzDecoder<R> { reader.reset(); reader.get_mut().reset_data(); header.take(); - GzState::Header(Vec::with_capacity(10)) + GzState::Header(GzHeaderPartial::new()) } } else { GzState::End @@ -639,3 +741,156 @@ impl<R: AsyncWrite + BufRead> AsyncWrite for MultiGzDecoder<R> { self.get_mut().shutdown() } } + +#[cfg(test)] +pub mod tests { + use crate::gz::bufread::*; + use std::io; + use std::io::{Cursor, Read, Write}; + + //a cursor turning EOF into blocking errors + #[derive(Debug)] + pub struct BlockingCursor { + pub cursor: Cursor<Vec<u8>>, + } + + impl BlockingCursor { + pub fn new() -> BlockingCursor { + BlockingCursor { + cursor: Cursor::new(Vec::new()), + } + } + + pub fn set_position(&mut self, pos: u64) { + return self.cursor.set_position(pos); + } + + pub fn position(&mut self) -> u64 { + return self.cursor.position(); + } + } + + impl Write for BlockingCursor { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + return self.cursor.write(buf); + } + fn flush(&mut self) -> io::Result<()> { + return self.cursor.flush(); + } + } + + impl Read for BlockingCursor { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + //use the cursor, except it turns eof into blocking error + let r = self.cursor.read(buf); + match r { + Err(ref err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Err(io::ErrorKind::WouldBlock.into()); + } + } + Ok(0) => { + //regular EOF turned into blocking error + return Err(io::ErrorKind::WouldBlock.into()); + } + Ok(_n) => {} + } + return r; + } + } + #[test] + // test function read_and_forget of Buffer + fn buffer_read_and_forget() { + // this is unused except for the buffering + let mut part = GzHeaderPartial::new(); + // this is a reader which receives data afterwards + let mut r = BlockingCursor::new(); + let data = vec![1, 2, 3]; + let mut out = Vec::with_capacity(7); + + match r.write_all(&data) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(0); + + // First read : successful for one byte + let mut reader = Buffer::new(&mut part, &mut r); + out.resize(1, 0); + match reader.read_and_forget(&mut out) { + Ok(1) => {} + _ => { + panic!("Unexpected result for read_and_forget with data"); + } + } + + // Second read : incomplete for 7 bytes (we have only 2) + out.resize(7, 0); + match reader.read_and_forget(&mut out) { + Err(ref err) => { + assert_eq!(io::ErrorKind::WouldBlock, err.kind()); + } + _ => { + panic!("Unexpected result for read_and_forget with incomplete"); + } + } + + // 3 more data bytes have arrived + let pos = r.position(); + let data2 = vec![4, 5, 6]; + match r.write_all(&data2) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(pos); + + // Third read : still incomplete for 7 bytes (we have 5) + let mut reader2 = Buffer::new(&mut part, &mut r); + match reader2.read_and_forget(&mut out) { + Err(ref err) => { + assert_eq!(io::ErrorKind::WouldBlock, err.kind()); + } + _ => { + panic!("Unexpected result for read_and_forget with more incomplete"); + } + } + + // 3 more data bytes have arrived again + let pos2 = r.position(); + let data3 = vec![7, 8, 9]; + match r.write_all(&data3) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(pos2); + + // Fourth read : now succesful for 7 bytes + let mut reader3 = Buffer::new(&mut part, &mut r); + match reader3.read_and_forget(&mut out) { + Ok(7) => { + assert_eq!(out[0], 2); + assert_eq!(out[6], 8); + } + _ => { + panic!("Unexpected result for read_and_forget with data"); + } + } + + // Fifth read : succesful for one more byte + out.resize(1, 0); + match reader3.read_and_forget(&mut out) { + Ok(1) => { + assert_eq!(out[0], 9); + } + _ => { + panic!("Unexpected result for read_and_forget with data"); + } + } + } +} diff --git a/src/gz/mod.rs b/src/gz/mod.rs index 3108035..3c894c9 100644 --- a/src/gz/mod.rs +++ b/src/gz/mod.rs @@ -356,4 +356,33 @@ mod tests { write!(f, "Hello world").unwrap(); f.flush().unwrap(); } + + use crate::gz::bufread::tests::BlockingCursor; + #[test] + // test function read_and_forget of Buffer + fn blocked_partial_header_read() { + // this is a reader which receives data afterwards + let mut r = BlockingCursor::new(); + let data = vec![1, 2, 3]; + + match r.write_all(&data) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(0); + + // this is unused except for the buffering + let mut decoder = read::GzDecoder::new(r); + let mut out = Vec::with_capacity(7); + match decoder.read(&mut out) { + Err(e) => { + assert_eq!(e.kind(), std::io::ErrorKind::WouldBlock); + } + _ => { + panic!("Unexpected result for decoder.read"); + } + } + } } @@ -3,7 +3,7 @@ use std::fmt; use std::io; use std::slice; -use crate::ffi::{self, Backend, Deflate, DeflateBackend, Inflate, InflateBackend}; +use crate::ffi::{self, Backend, Deflate, DeflateBackend, ErrorMessage, Inflate, InflateBackend}; use crate::Compression; /// Raw in-memory compression stream for blocks of data. @@ -114,9 +114,10 @@ pub enum FlushDecompress { } /// The inner state for an error when decompressing -#[derive(Debug, Default)] -pub(crate) struct DecompressErrorInner { - pub(crate) needs_dictionary: Option<u32>, +#[derive(Debug)] +pub(crate) enum DecompressErrorInner { + General { msg: ErrorMessage }, + NeedsDictionary(u32), } /// Error returned when a decompression object finds that the input stream of @@ -130,26 +131,36 @@ impl DecompressError { /// The resulting integer is the Adler-32 checksum of the dictionary /// required. pub fn needs_dictionary(&self) -> Option<u32> { - self.0.needs_dictionary + match self.0 { + DecompressErrorInner::NeedsDictionary(adler) => Some(adler), + _ => None, + } } } #[inline] -pub(crate) fn decompress_failed() -> Result<Status, DecompressError> { - Err(DecompressError(Default::default())) +pub(crate) fn decompress_failed<T>(msg: ErrorMessage) -> Result<T, DecompressError> { + Err(DecompressError(DecompressErrorInner::General { msg })) } #[inline] -pub(crate) fn decompress_need_dict(adler: u32) -> Result<Status, DecompressError> { - Err(DecompressError(DecompressErrorInner { - needs_dictionary: Some(adler), - })) +pub(crate) fn decompress_need_dict<T>(adler: u32) -> Result<T, DecompressError> { + Err(DecompressError(DecompressErrorInner::NeedsDictionary( + adler, + ))) } /// Error returned when a compression object is used incorrectly or otherwise /// generates an error. #[derive(Debug)] -pub struct CompressError(pub(crate) ()); +pub struct CompressError { + pub(crate) msg: ErrorMessage, +} + +#[inline] +pub(crate) fn compress_failed<T>(msg: ErrorMessage) -> Result<T, CompressError> { + Err(CompressError { msg }) +} /// Possible status results of compressing some data or successfully /// decompressing a block of data. @@ -241,7 +252,7 @@ impl Compress { /// /// This constructor is only available when the `zlib` feature is used. /// Other backends currently do not support gzip headers for Compress. - #[cfg(feature = "zlib")] + #[cfg(feature = "any_zlib")] pub fn new_gzip(level: Compression, window_bits: u8) -> Compress { assert!( window_bits > 8 && window_bits < 16, @@ -270,13 +281,14 @@ impl Compress { #[cfg(feature = "any_zlib")] pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result<u32, CompressError> { let stream = &mut *self.inner.inner.stream_wrapper; + stream.msg = std::ptr::null_mut(); let rc = unsafe { assert!(dictionary.len() < ffi::uInt::max_value() as usize); ffi::deflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt) }; match rc { - ffi::MZ_STREAM_ERROR => Err(CompressError(())), + ffi::MZ_STREAM_ERROR => compress_failed(self.inner.inner.msg()), ffi::MZ_OK => Ok(stream.adler as u32), c => panic!("unknown return code: {}", c), } @@ -303,12 +315,13 @@ impl Compress { pub fn set_level(&mut self, level: Compression) -> Result<(), CompressError> { use libc::c_int; let stream = &mut *self.inner.inner.stream_wrapper; + stream.msg = std::ptr::null_mut(); let rc = unsafe { ffi::deflateParams(stream, level.0 as c_int, ffi::MZ_DEFAULT_STRATEGY) }; match rc { ffi::MZ_OK => Ok(()), - ffi::MZ_BUF_ERROR => Err(CompressError(())), + ffi::MZ_BUF_ERROR => compress_failed(self.inner.inner.msg()), c => panic!("unknown return code: {}", c), } } @@ -410,7 +423,7 @@ impl Decompress { /// /// This constructor is only available when the `zlib` feature is used. /// Other backends currently do not support gzip headers for Decompress. - #[cfg(feature = "zlib")] + #[cfg(feature = "any_zlib")] pub fn new_gzip(window_bits: u8) -> Decompress { assert!( window_bits > 8 && window_bits < 16, @@ -503,16 +516,15 @@ impl Decompress { #[cfg(feature = "any_zlib")] pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result<u32, DecompressError> { let stream = &mut *self.inner.inner.stream_wrapper; + stream.msg = std::ptr::null_mut(); let rc = unsafe { assert!(dictionary.len() < ffi::uInt::max_value() as usize); ffi::inflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt) }; match rc { - ffi::MZ_STREAM_ERROR => Err(DecompressError(Default::default())), - ffi::MZ_DATA_ERROR => Err(DecompressError(DecompressErrorInner { - needs_dictionary: Some(stream.adler as u32), - })), + ffi::MZ_STREAM_ERROR => decompress_failed(self.inner.inner.msg()), + ffi::MZ_DATA_ERROR => decompress_need_dict(stream.adler as u32), ffi::MZ_OK => Ok(stream.adler as u32), c => panic!("unknown return code: {}", c), } @@ -533,6 +545,16 @@ impl Decompress { impl Error for DecompressError {} +impl DecompressError { + /// Retrieve the implementation's message about why the operation failed, if one exists. + pub fn message(&self) -> Option<&str> { + match &self.0 { + DecompressErrorInner::General { msg } => msg.get(), + _ => None, + } + } +} + impl From<DecompressError> for io::Error { fn from(data: DecompressError) -> io::Error { io::Error::new(io::ErrorKind::Other, data) @@ -541,12 +563,26 @@ impl From<DecompressError> for io::Error { impl fmt::Display for DecompressError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "deflate decompression error") + let msg = match &self.0 { + DecompressErrorInner::General { msg } => msg.get(), + DecompressErrorInner::NeedsDictionary { .. } => Some("requires a dictionary"), + }; + match msg { + Some(msg) => write!(f, "deflate decompression error: {}", msg), + None => write!(f, "deflate decompression error"), + } } } impl Error for CompressError {} +impl CompressError { + /// Retrieve the implementation's message about why the operation failed, if one exists. + pub fn message(&self) -> Option<&str> { + self.msg.get() + } +} + impl From<CompressError> for io::Error { fn from(data: CompressError) -> io::Error { io::Error::new(io::ErrorKind::Other, data) @@ -555,7 +591,10 @@ impl From<CompressError> for io::Error { impl fmt::Display for CompressError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "deflate decompression error") + match self.msg.get() { + Some(msg) => write!(f, "deflate compression error: {}", msg), + None => write!(f, "deflate compression error"), + } } } @@ -707,7 +746,7 @@ mod tests { assert_eq!(&decoded[..decoder.total_out() as usize], string); } - #[cfg(feature = "zlib")] + #[cfg(feature = "any_zlib")] #[test] fn test_gzip_flate() { let string = "hello, hello!".as_bytes(); @@ -732,4 +771,18 @@ mod tests { assert_eq!(&decoded[..decoder.total_out() as usize], string); } + + #[cfg(feature = "any_zlib")] + #[test] + fn test_error_message() { + let mut decoder = Decompress::new(false); + let mut decoded = [0; 128]; + let garbage = b"xbvxzi"; + + let err = decoder + .decompress(&*garbage, &mut decoded, FlushDecompress::Finish) + .unwrap_err(); + + assert_eq!(err.message(), Some("invalid stored block lengths")); + } } diff --git a/tests/corrupt-file.gz b/tests/corrupt-gz-file.bin Binary files differindex 159333b..159333b 100644 --- a/tests/corrupt-file.gz +++ b/tests/corrupt-gz-file.bin diff --git a/tests/gunzip.rs b/tests/gunzip.rs index 855c620..c382032 100644 --- a/tests/gunzip.rs +++ b/tests/gunzip.rs @@ -33,7 +33,7 @@ fn test_extract_success_partial_multi() { // test extraction fails on a corrupt file #[test] fn test_extract_failure() { - let result = extract_file(Path::new("tests/corrupt-file.gz")); + let result = extract_file(Path::new("tests/corrupt-gz-file.bin")); assert_eq!(result.err().unwrap().kind(), io::ErrorKind::InvalidInput); } |