diff options
author | Jeff Sharkey <jsharkey@android.com> | 2018-10-02 09:13:26 -0700 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2018-10-02 09:13:26 -0700 |
commit | b6bfd1505ddccaa337c58bc0073d63ada4f4ad63 (patch) | |
tree | 62a698a705643b95f7d73e2298a151d83a77de31 | |
parent | 2241a05ab272e082a64c466e7f10a620a2de42ff (diff) | |
parent | 4238dbe0303da3e25c9d688a9584998297e6ca9d (diff) | |
download | apache-commons-compress-b6bfd1505ddccaa337c58bc0073d63ada4f4ad63.tar.gz |
Merge remote-tracking branch 'remotes/aosp/upstream-master' am: e19ee35f77 am: 5158a15215
am: 4238dbe030
Change-Id: Idb30f79f30772e33df256711e3fa050145c1bf77
597 files changed, 84934 insertions, 0 deletions
diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..a02cd590f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,17 @@ +*.ar binary +*.arj binary +*.java text diff=java +*.md text +*.txt text +*.xml text +*.yml text +*.zst binary +.gitattributes text +# These must be identical to the copies in SHRUNK.ZIP +src/test/resources/test1.xml eol=lf +src/test/resources/test2.xml eol=lf +src/test/resources/test3.xml eol=lf +src/test/resources/test4.xml eol=lf +src/test/resources/test?with?spaces.txt eol=lf +src/test/resources/test.txt eol=lf +src/test/resources/COMPRESS-380-input binary diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..a868385df --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +target
+.project
+.classpath
+.settings
+.idea
+*.iml
+*~
+/.externalToolBuilders/ +/maven-eclipse.xml diff --git a/.mvn/wrapper/maven-wrapper.jar b/.mvn/wrapper/maven-wrapper.jar Binary files differnew file mode 100644 index 000000000..9cc84ea9b --- /dev/null +++ b/.mvn/wrapper/maven-wrapper.jar diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 000000000..56bb0164e --- /dev/null +++ b/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1 @@ +distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.5.0/apache-maven-3.5.0-bin.zip
\ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..afcff3efa --- /dev/null +++ b/.travis.yml @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +sudo: false +language: java + +jdk: + - openjdk7 + - openjdk8 + - oraclejdk8 + - oraclejdk9 + - oraclejdk10 + +cache: + directories: + - $HOME/.m2 + - .mvn +# don't run mvn install before building +install: true + +# do all the building and testing in a single phase, instead of compiling everything three times +# and running all tests twice. + +script: + - ./mvnw clean apache-rat:check test jacoco:report coveralls:report -Ptravis-jacoco diff --git a/BUILDING.md b/BUILDING.md new file mode 100644 index 000000000..616e27120 --- /dev/null +++ b/BUILDING.md @@ -0,0 +1,55 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +# Building Apache Commons Compress + +In order to build Commons Compress a JDK implementation 1.7 or higher +and Apache Maven 3.x are required. + +To install the jars into your local Maven repository simply run + + mvn clean install + +which will also run the unit tests. + +Some tests are only run when specific profiles are enabled, these +tests require a lot of disk space as they test behavior for very large +archives. + + mvn test -Prun-tarit + +runs tests for tar archives and requires more than 8GiB of disk space. + + mvn test -Prun-zipit + +runs tests for zip archives that require up to 20 GiB of disk +space. In addition the tests will run for a long time (more then ten +minutes, maybe even longer depending on your hardware) and heavily +load the CPU at times. + +## Building the Site + +The site build produces license release audit (aka RAT) reports as +well as PMD and findbugs reports. Clirr didn't work for us anymore so +we switched to japicmp, the same is true for Cobertura which we had to +replace with jacoco. + +japicmp requires the jar to be present when the site is built, +therefore the package goal must be executed before creating the site. + + mvn package site -Pjacoco + +builds the site. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..aaa2c87d5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,115 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<!--- + +======================================================================+ + |**** ****| + |**** THIS FILE IS GENERATED BY THE COMMONS BUILD PLUGIN ****| + |**** DO NOT EDIT DIRECTLY ****| + |**** ****| + +======================================================================+ + | TEMPLATE FILE: contributing-md-template.md | + | commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates | + +======================================================================+ + | | + | 1) Re-generate using: mvn commons:contributing-md | + | | + | 2) Set the following properties in the component's pom: | + | - commons.jira.id (required, alphabetic, upper case) | + | | + | 3) Example Properties | + | | + | <properties> | + | <commons.jira.id>MATH</commons.jira.id> | + | </properties> | + | | + +======================================================================+ +---> +Contributing to Apache Commons Compress +====================== + +You have found a bug or you have an idea for a cool new feature? Contributing code is a great way to give something back to +the open source community. Before you dig right into the code there are a few guidelines that we need contributors to +follow so that we can have a chance of keeping on top of things. + +Getting Started +--------------- + ++ Make sure you have a [JIRA account](https://issues.apache.org/jira/). ++ Make sure you have a [GitHub account](https://github.com/signup/free). ++ If you're planning to implement a new feature it makes sense to discuss your changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons Compress's scope. ++ Submit a [Jira Ticket][jira] for your issue, assuming one does not already exist. + + Clearly describe the issue including steps to reproduce when it is a bug. + + Make sure you fill in the earliest version that you know has the issue. ++ Find the corresponding [repository on GitHub](https://github.com/apache/?query=commons-), +[fork](https://help.github.com/articles/fork-a-repo/) and check out your forked repository. + +Making Changes +-------------- + ++ Create a _topic branch_ for your isolated work. + * Usually you should base your branch on the `master` or `trunk` branch. + * A good topic branch name can be the JIRA bug id plus a keyword, e.g. `COMPRESS-123-InputStream`. + * If you have submitted multiple JIRA issues, try to maintain separate branches and pull requests. ++ Make commits of logical units. + * Make sure your commit messages are meaningful and in the proper format. Your commit message should contain the key of the JIRA issue. + * e.g. `COMPRESS-123: Close input stream earlier` ++ Respect the original code style: + + Only use spaces for indentation. + + Create minimal diffs - disable _On Save_ actions like _Reformat Source Code_ or _Organize Imports_. If you feel the source code should be reformatted create a separate PR for this change first. + + Check for unnecessary whitespace with `git diff` -- check before committing. ++ Make sure you have added the necessary tests for your changes, typically in `src/test/java`. ++ Run all the tests with `mvn clean verify` to assure nothing else was accidentally broken. + +Making Trivial Changes +---------------------- + +The JIRA tickets are used to generate the changelog for the next release. + +For changes of a trivial nature to comments and documentation, it is not always necessary to create a new ticket in JIRA. +In this case, it is appropriate to start the first line of a commit with '(doc)' instead of a ticket number. + + +Submitting Changes +------------------ + ++ Sign and submit the Apache [Contributor License Agreement][cla] if you haven't already. + * Note that small patches & typical bug fixes do not require a CLA as + clause 5 of the [Apache License](https://www.apache.org/licenses/LICENSE-2.0.html#contributions) + covers them. ++ Push your changes to a topic branch in your fork of the repository. ++ Submit a _Pull Request_ to the corresponding repository in the `apache` organization. + * Verify _Files Changed_ shows only your intended changes and does not + include additional files like `target/*.class` ++ Update your JIRA ticket and include a link to the pull request in the ticket. + +If you prefer to not use GitHub, then you can instead use +`git format-patch` (or `svn diff`) and attach the patch file to the JIRA issue. + + +Additional Resources +-------------------- + ++ [Contributing patches](https://commons.apache.org/patches.html) ++ [Apache Commons Compress JIRA project page][jira] ++ [Contributor License Agreement][cla] ++ [General GitHub documentation](https://help.github.com/) ++ [GitHub pull request documentation](https://help.github.com/articles/creating-a-pull-request/) ++ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) ++ `#apache-commons` IRC channel on `irc.freenode.net` + +[cla]:https://www.apache.org/licenses/#clas +[jira]:https://issues.apache.org/jira/browse/COMPRESS diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NOTICE.txt b/NOTICE.txt new file mode 100644 index 000000000..3fbe8215a --- /dev/null +++ b/NOTICE.txt @@ -0,0 +1,11 @@ +Apache Commons Compress +Copyright 2002-2018 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (https://www.apache.org/). + +The files in the package org.apache.commons.compress.archivers.sevenz +were derived from the LZMA SDK, version 9.20 (C/ and CPP/7zip/), +which has been placed in the public domain: + +"LZMA SDK is placed in the public domain." (http://www.7-zip.org/sdk.html) diff --git a/README.md b/README.md new file mode 100644 index 000000000..b576ca3d1 --- /dev/null +++ b/README.md @@ -0,0 +1,108 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<!--- + +======================================================================+ + |**** ****| + |**** THIS FILE IS GENERATED BY THE COMMONS BUILD PLUGIN ****| + |**** DO NOT EDIT DIRECTLY ****| + |**** ****| + +======================================================================+ + | TEMPLATE FILE: readme-md-template.md | + | commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates | + +======================================================================+ + | | + | 1) Re-generate using: mvn commons:readme-md | + | | + | 2) Set the following properties in the component's pom: | + | - commons.componentid (required, alphabetic, lower case) | + | - commons.release.version (required) | + | | + | 3) Example Properties | + | | + | <properties> | + | <commons.componentid>math</commons.componentid> | + | <commons.release.version>1.2</commons.release.version> | + | </properties> | + | | + +======================================================================+ +---> +Apache Commons Compress +=================== + +[![Build Status](https://travis-ci.org/apache/commons-compress.svg)](https://travis-ci.org/apache/commons-compress) +[![Coverage Status](https://coveralls.io/repos/apache/commons-compress/badge.svg)](https://coveralls.io/r/apache/commons-compress) +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-compress/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-compress/) +[![License](https://img.shields.io/:license-apache-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0.html) + +Apache Commons Compress software defines an API for working with +compression and archive formats. These include: bzip2, gzip, pack200, +lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, +Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj. + +Documentation +------------- + +More information can be found on the [Apache Commons Compress homepage](https://commons.apache.org/proper/commons-compress). +The [Javadoc](https://commons.apache.org/proper/commons-compress/javadocs/api-release) can be browsed. +Questions related to the usage of Apache Commons Compress should be posted to the [user mailing list][ml]. + +Where can I get the latest release? +----------------------------------- +You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-compress/download_compress.cgi). + +Alternatively you can pull it from the central Maven repositories: + +```xml +<dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + <version>1.18</version> +</dependency> +``` + +Contributing +------------ + +We accept Pull Requests via GitHub. The [developer mailing list][ml] is the main channel of communication for contributors. +There are some guidelines which will make applying PRs easier for us: ++ No tabs! Please use spaces for indentation. ++ Respect the code style. ++ Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change. ++ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn clean test```. + +If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas). +You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md). + +License +------- +This code is under the [Apache Licence v2](https://www.apache.org/licenses/LICENSE-2.0). + +See the `NOTICE.txt` file for required notices and attributions. + +Donations +--------- +You like Apache Commons Compress? Then [donate back to the ASF](https://www.apache.org/foundation/contributing.html) to support the development. + +Additional Resources +-------------------- + ++ [Apache Commons Homepage](https://commons.apache.org/) ++ [Apache Issue Tracker (JIRA)](https://issues.apache.org/jira/browse/COMPRESS) ++ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) ++ `#apache-commons` IRC channel on `irc.freenode.org` + +[ml]:https://commons.apache.org/mail-lists.html diff --git a/README.txt b/README.txt new file mode 100644 index 000000000..8a266a711 --- /dev/null +++ b/README.txt @@ -0,0 +1,43 @@ +Apache Commons Compress +======================= + +Commons Compress is a Java library for working with various +compression and archiving formats. + +For full documentation see https://commons.apache.org/proper/commons-compress/ + +## Apache Commons Compress was derived from various sources, including: + +Original BZip2 classes contributed by Keiron Liddle +<keiron@aftexsw.com>, Aftex Software to the Apache Ant project. +They are based on a port of Julian Seward's libbzip2. + +Original Tar classes from contributors of the Apache Ant project. + +Original Zip classes from contributors of the Apache Ant project. + +Original CPIO classes contributed by Markus Kuss and the jRPM project +(jrpm.sourceforge.net). + +This distribution includes cryptographic software. The country in +which you currently reside may have restrictions on the import, +possession, use, and/or re-export to another country, of encryption +software. BEFORE using any encryption software, please check your +country's laws, regulations and policies concerning the import, +possession, or use, and re-export of encryption software, to see if +this is permitted. See <http://www.wassenaar.org/> for more +information. + +The U.S. Government Department of Commerce, Bureau of Industry and +Security (BIS), has classified this software as Export Commodity +Control Number (ECCN) 5D002.C.1, which includes information security +software using or performing cryptographic functions with asymmetric +algorithms. The form and manner of this Apache Software Foundation +distribution makes it eligible for export under the License Exception +ENC Technology Software Unrestricted (TSU) exception (see the BIS +Export Administration Regulations, Section 740.13) for both object +code and source code. + +The following provides more details on the included cryptographic +software: + * the 7zip package can read AES encrypted archives diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt new file mode 100644 index 000000000..b40901f06 --- /dev/null +++ b/RELEASE-NOTES.txt @@ -0,0 +1,1088 @@ + Apache Commons Compress RELEASE NOTES + +Apache Commons Compress software defines an API for working with +compression and archive formats. These include: bzip2, gzip, pack200, +lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, +Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj. + +Release 1.18 +------------ + +New features: +o It is now possible to specify the arguments of zstd-jni's + ZstdOutputStream constructors via Commons Compress as well. + Issue: COMPRESS-460. + Thanks to Carmi Grushko. + +Fixed Bugs: +o The example Expander class has been vulnerable to a path + traversal in the edge case that happens when the target + directory has a sibling directory and the name of the target + directory is a prefix of the sibling directory's name. + Thanks to Didier Loiseau. +o Changed the OSGi Import-Package to also optionally import + javax.crypto so encrypted archives can be read. + Issue: COMPRESS-456. +o Changed various implementations of the close method to better + ensure all held resources get closed even if exceptions are + thrown during the closing the stream. + Issue: COMPRESS-457. +o ZipArchiveInputStream can now detect the APK Signing Block + used in signed Android APK files and treats it as an "end of + archive" marker. + Issue: COMPRESS-455. +o The cpio streams didn't handle archives using a multi-byte + encoding properly. + Issue: COMPRESS-459. + Thanks to Jens Reimann. +o ZipArchiveInputStream#read would silently return -1 on a + corrupted stored entry and even return > 0 after hitting the + end of the archive. + Issue: COMPRESS-463. +o ArArchiveInputStream#read would allow to read from the stream + without opening an entry at all. + Issue: COMPRESS-462. + +Release 1.17 +------------ + +New features: +o Added a unit test that is supposed to fail if we break the + OSGi manifest entries again. + Issue: COMPRESS-443. +o Add a new SkipShieldingInputStream class that can be used with + streams that throw an IOException when skip is invoked. + Issue: COMPRESS-449. +o New constructors have been added to SevenZFile that accept + char[]s rather than byte[]s in order to avoid a common error + of using the wrong encoding when creating the byte[]. This + change may break source compatibility for client code that + uses one of the constructors expecting a password and passes + in null as password. We recommend to change the code to use a + constructor without password argument. + Issue: COMPRESS-452. + +Fixed Bugs: +o Removed the objenesis dependency from the pom as it is not + needed at all. +o Fixed resource leak in ParallelScatterZipCreator#writeTo. + Issue: COMPRESS-446. +o Certain errors when parsing ZIP extra fields in corrupt + archives are now turned into ZipException, they used to + manifest as ArrayIndexOutOfBoundsException before. + Issue: COMPRESS-447. +o IOUtils.copy now verifies the buffer size is bigger than 0. + Issue: COMPRESS-451. +o ZipArchiveInputStream failed to read some files with stored + entries using a data descriptor. + Issue: COMPRESS-454. + +Changes: +o Fixed some code examples. + Github Pull Request #63. + Thanks to Marchenko Sergey. +o The streams returned by ZipFile and most other decompressing + streams now provide information about the number of compressed + and uncompressed bytes read so far. This may be used to detect + a ZipBomb if the compression ratio exceeds a certain + threshold, for example. + For SevenZFile a new method returns the statistics for the + current entry. + Issue: COMPRESS-445. + Thanks to Andreas Beeker. +o Added a workaround for a bug in AdoptOpenJDK for S/390 to + BZip2CompressorInputStream. + Issue: COMPRESS-453. + +Release 1.16.1 +-------------- + +Fixed Bug: +o Fixed the OSGi manifest entry for imports that has been broken + in 1.16. + Issue: COMPRESS-442. + +Release 1.16 +------------ + +New features: +o Add read-only support for Zstandard compression based on the + Zstd-jni project. + Issue: COMPRESS-423. Thanks to Andre F de Miranda. +o Added auto-detection for Zstandard compressed streams. + Issue: COMPRESS-425. +o Added write-support for Zstandard compression. + Issue: COMPRESS-426. +o Added read-only DEFLATE64 support to ZIP archives and as + stand-alone CompressorInputStream. + Issue: COMPRESS-380. Thanks to Christian Marquez Grabia. +o Added read-only DEFLATE64 support to 7z archives. + Issue: COMPRESS-437. + +Fixed Bugs: +o Synchronized iteration over a synchronizedList in + ParallelScatterZipCreator. + Issue: COMPRESS-430. Thanks to Bruno P. Kinoshita. +o ZipFile could get stuck in an infinite loop when parsing ZIP + archives with certain strong encryption headers. + Issue: COMPRESS-432. +o Added improved checks to detect corrupted bzip2 streams and + throw the expected IOException rather than obscure + RuntimeExceptions. + Issue: COMPRESS-424. + +Changes: +o Replaces instanceof checks with a type marker in LZ77 support code. + Issue: COMPRESS-435. Thanks to BELUGA BEHR. +o Updated XZ for Java dependency to 1.8 in order to pick up bug fix + to LZMA2InputStream's available method. +o ZipArchiveEntry now exposes how the name or comment have been + determined when the entry was read. + Issue: COMPRESS-429. Thanks to Damiano Albani. +o ZipFile.getInputStream will now always buffer the stream + internally in order to improve read performance. + Issue: COMPRESS-438. +o Speed improvement for DEFLATE64 decompression. + Issue: COMPRESS-440. Thanks to Dawid Weiss. +o Added a few extra sanity checks for the rarer compression + methods used in ZIP archives. + Issue: COMPRESS-436. +o Simplified the special handling for the dummy byte required by + zlib when using java.util.zip.Inflater. + Issue: COMPRESS-441. +o Various code cleanups. + Github Pull Request #61. Thanks to Shahab Kondri. +o TarArchiveEntry's preserveLeadingSlashes constructor argument + has been renamed and can now also be used to preserve the + drive letter on Windows. + +Release 1.15 +------------ + +New features: +o Added magic MANIFEST entry Automatic-Module-Name so the module + name will be org.apache.commons.compress when the jar is used + as an automatic module in Java9. + Issue: COMPRESS-397. +o Added a new utility class FixedLengthBlockOutputStream that + can be used to ensure writing always happens in blocks of a + given size. + Issue: COMPRESS-405. Thanks to Simon Spero. +o It is now possible to specify/read custom PAX headers when + writing/reading tar archives. + Issue: COMPRESS-400. Thanks to Simon Spero. + +Fixed Bugs: +o Make sure "version needed to extract" in local file header and + central directory of a ZIP archive agree with each other. + Also ensure the version is set to 2.0 if DEFLATE is used. + Issue: COMPRESS-394. +o Don't use a data descriptor in ZIP archives when copying a raw + entry that already knows its size and CRC information. + Issue: COMPRESS-395. +o Travis build redundantly repeats compilation and tests redundantly + GitHub Pull Request #43. Thanks to Simon Spero. + Issue: COMPRESS-413 +o The MANIFEST of 1.14 lacks an OSGi Import-Package for XZ for + Java. + Issue: COMPRESS-396. +o BUILDING.md now passes the RAT check. + Issue: COMPRESS-406. Thanks to Simon Spero. +o Made sure ChecksumCalculatingInputStream receives valid + checksum and input stream instances via the constructor. + Issue: COMPRESS-412. Thanks to Michael Hausegger. +o TarArchiveOutputStream now verifies the block and record sizes + specified at construction time are compatible with the tar + specification. In particular 512 is the only record size + accepted and the block size must be a multiple of 512. + Issue: COMPRESS-407. Thanks to Simon Spero. +o Fixed class names of CpioArchiveEntry and + CpioArchiveInputStream in various Javadocs. + Issue: COMPRESS-415. +o The code of the extended timestamp zip extra field incorrectly + assumed the time was stored as unsigned 32-bit int and thus + created incorrect results for years after 2037. + Issue: COMPRESS-416. Thanks to Simon Spero. +o Removed ZipEncoding code that became obsolete when we started + to require Java 5 as baseline long ago. + Issue: COMPRESS-410. Thanks to Simon Spero. +o The tar package will no longer try to parse the major and + minor device numbers unless the entry represents a character + or block special file. + Issue: COMPRESS-417. +o When reading tar headers with name fields containing embedded + NULs, the name will now be terminated at the first NUL byte. + Issue: COMPRESS-421. Thanks to Roel Spilker. +o Simplified TarArchiveOutputStream by replacing the internal + buffering with new class FixedLengthBlockOutputStream. + Issue: COMPRESS-409. + +Release 1.14 +------------ + +New features: +o Added write support for Snappy. + Issue: COMPRESS-246. +o Added support for LZ4 (block and frame format). + Issue: COMPRESS-271. +o Add static detect(InputStream in) to CompressorStreamFactory + and ArchiveStreamFactory + Issue: COMPRESS-385. +o Added a way to limit amount of memory ZCompressorStream may + use. + Issue: COMPRESS-382. Thanks to Tim Allison. +o Added a way to limit amount of memory ZCompressorStream may + use. + Issue: COMPRESS-386. Thanks to Tim Allison. +o Added a way to limit amount of memory LZMACompressorStream and + XZCompressorInputStream may use. + Issue: COMPRESS-382. Thanks to Tim Allison. +o Add Brotli decoder based on the Google Brotli library. + Issue: COMPRESS-392. Thanks to Philippe Mouawad. +o ZipEntry now exposes its data offset. + Issue: COMPRESS-390. Thanks to Zbynek Vyskovsky. +o Using ZipArchiveEntry's setAlignment it is now possible to + ensure the data offset of an entry starts at a file position + that at word or page boundaries. + A new extra field has been added for this purpose. + Issue: COMPRESS-391. Thanks to Zbynek Vyskovsky. + +Fixed Bugs: +o SnappyCompressorInputStream slides the window too early + leading to ArrayIndexOutOfBoundsExceptions for some streams. + Issue: COMPRESS-378. +o ZipArchiveEntry#isUnixSymlink now only returns true if the + corresponding link flag is the only file-type flag set. + Issue: COMPRESS-379. Thanks to Guillaume Boué. +o Fixed an integer overflow in CPIO's CRC calculation. + Pull Request #17. Thanks to Daniel Collin. +o Make unit tests work on Windows paths with spaces in their names. + Issue: COMPRESS-387. +o Internal location pointer in ZipFile could get incremented + even if nothing had been read. + Issue: COMPRESS-389. +o LZMACompressorOutputStream#flush would throw an exception + rather than be the NOP it promised to be. + Issue: COMPRESS-393. + +Changes: +o The blocksize for FramedSnappyCompressorInputStream can now be + configured as some IWA files seem to be using blocks larger + than the default 32k. + Issue: COMPRESS-358. +o BZip2CompressorInputstream now uses BitInputStream internally. + Pull Request #13. Thanks to Thomas Meyer. +o Improved performance for concurrent reads from ZipFile when + reading from a file. + Issue: COMPRESS-388. Thanks to Zbynek Vyskovsky. + +Release 1.13 +------------ + +Commons Compress 1.13 is the first version to require Java 7 at +runtime. + +Changes in this version include: + +New features: +o SevenZFile, SevenZOutputFile, ZipFile and + ZipArchiveOutputStream can now work on non-file resources if + they can be accessed via SeekableByteChannel. + Issue: COMPRESS-327. +o Allow compressor extensions through a standard JRE ServiceLoader. + Issue: COMPRESS-368. +o Allow archive extensions through a standard JRE ServiceLoader. + Issue: COMPRESS-369. +o Add write support for the legacy LZMA format, this requires XZ + for Java 1.6. + Issue: COMPRESS-373. +o Add write support for the legacy LZMA stream to 7z, this + requires XZ for Java 1.6. + Issue: COMPRESS-374. +o Allow the clients of ParallelScatterZipCreator to provide + ZipArchiveEntryRequestSupplier. + Issue: COMPRESS-375. Thanks to Plamen Totev. +o Add a version-independent link to the API docs of the latest + release. + Issue: COMPRESS-372. + +Fixed Bugs: +o BitInputStream could return bad results when overflowing + internally - if two consecutive reads tried to read more than + 64 bits. + Issue: COMPRESS-363. +o ZipArchiveInputStream.closeEntry does not properly advance to + next entry if there are junk bytes at end of data section. + Issue: COMPRESS-364. Thanks to Mike Mole. +o ZipArchiveInputStream now throws an Exception if it encounters + a broken ZIP archive rather than signaling end-of-archive. + Issue: COMPRESS-367. Thanks to Mike Mole. +o ScatterZipOutputStream didn't close the StreamCompressor + causing a potential resource leak. + Issue: COMPRESS-377. + +Changes: +o Update Java requirement from 6 to 7. + Issue: COMPRESS-360. +o Clarified which TarArchiveEntry methods are useless for + entries read from an archive. + Issue: COMPRESS-366. + +Release 1.12 +------------ + +Commons Compress 1.12 is the first version to require Java 6 at +runtime. + +Release 1.12 changes the behavior of BZip2CompressorOutputStream's +finalize method so that it no longer invokes finish. This is going to +break code that relied on the finalizer to clean up an unfinished +stream. The code will need to be changed to call finish or close +itself. Note that a finalizer is not guaranteed to run, so the feature +was not 100% effective in any case. + +New features: + +o FramedSnappyCompressorInputStream now supports the dialect of + Snappy used by the IWA files contained within the zip archives + used in Apple's iWork 13 files. + Issue: COMPRESS-352. + +Fixed Bugs: + +o SevenZFile.read() throws an IllegalStateException for empty entries. + Issue: COMPRESS-348. +o TarArchiveInputStream failed to parse PAX headers that included + blank lines. + Issue: COMPRESS-355. Thanks to Jeremy Gustie. +o TarArchiveInputStream failed to parse PAX headers whose tar entry + name ended with a slash. + Issue: COMPRESS-356. Thanks to Jeremy Gustie. + +Changes: +o Update requirement from Java 5 to 6. + Issue: COMPRESS-349. +o TarArchiveEntry wastefully allocates empty arrays. + Issue: COMPRESS-350. +o Javadoc for BZip2CompressorInputStream(InputStream, boolean) should + refer to IOEx, not NPE. + Issue: COMPRESS-353. +o PureJavaCrc32C in the snappy package is now final so it is now safe + to call a virtual method inside the constructor. + Issue: COMPRESS-354. + +o ZipArchiveInputStream and CpioArchiveInputStream could throw + exceptions who's messages contained potentially corrupt entry names + read from a broken archive. They will now sanitize the names by + replacing unprintable characters and restricting the length to 255 + characters. + Issue: COMPRESS-351. +o BZip2CompressorOutputStream no longer tries to finish the output + stream in finalize. This is a breaking change for code that relied + on the finalizer. + Issue: COMPRESS-357. + + +Release 1.11 +------------ + +New features: +o TarArchiveInputStream now supports reading global PAX headers. + Issue: COMPRESS-347. +o The PAX headers for sparse entries written by star are now + applied. + Issue: COMPRESS-346. +o GNU sparse files using one of the PAX formats are now + detected, but cannot be extracted. + Issue: COMPRESS-345. +o New method SevenZFile.getEntries can be used to list the + contents of a 7z archive. + Issue: COMPRESS-341. +o When using Zip64Mode.Always also use ZIP64 extensions inside + the central directory. + GitHub Pull Request #10 Thanks to Matt Hovey. +o ZipFile.getRawInputStream() is now part of the public API + Issue: COMPRESS-323. +o Allow byte-for-byte replication of Zip entries. + GitHub Pull Request #6. Thanks to Jason van Zyl. +o TarArchiveEntry's preserveLeadingSlashes is now a property and used + on later calls to setName, too. + This behavior is a breaking change. + Issue: COMPRESS-328. +o Added read-only support for bzip2 compression used inside of + ZIP archives. + GitHub Pull Request #4. Thanks to Sören Glimm. + +Fixed Bugs: +o ArArchiveInputStream can now read GNU extended names that are + terminated with a NUL byte rather than a linefeed. + Issue: COMPRESS-344. +o Native Memory Leak in Sevenz-DeflateDecoder. + Issue: COMPRESS-343. Thanks to Rene Preissel. +o SevenZFile will now only try to drain an entry's content when + moving on to the next entry if data is read from the next + entry. This should improve performance for applications that + try to skip over entries. + Issue: COMPRESS-340. Thanks to Dawid Weiss. +o file names of tar archives using the xstar format are now + parsed properly. + Issue: COMPRESS-336. +o checksums of tars that pad the checksum field to the left are + now calculated properly. + Issue: COMPRESS-335. +o ArArchiveInputStream failed to read past the first entry when + BSD long names have been used. + Issue: COMPRESS-334. Thanks to Jeremy Gustie. +o Added buffering for random access which speeds up 7Z support. + Issue: COMPRESS-333. Thanks to Dawid Weiss. +o The checksum validation of TararchiveEntry is now as strict as + the validation of GNU tar, which eliminates a few cases of + false positives of ArchiveStreamFactory. + This behavior is a breaking change since the check has become + more strict but any archive that fails the checksum test now + would also fail it when extracted with other tools and must be + considered an invalid archive. + Issue: COMPRESS-331. +o SnappyCompressorInputStream and + FramedSnappyCompressorInputStream returned 0 at the end of the + stream under certain circumstances. + Issue: COMPRESS-332. +o Adjusted unit test to updates in Java8 and later that change + the logic of ZipEntry#getTime. + Issue: COMPRESS-326. +o TarArchiveOutputStream will now recognize GNU long name and + link entries even if the special entry has a different name + than GNU tar uses itself. This seems to be the case for + archives created by star. + Issue: COMPRESS-324. +o ArrayIndexOutOfBoundsException when InfoZIP type 7875 extra + fields are read from the central directory. + Issue: COMPRESS-321. + +Release 1.10 +------------ + +Release 1.10 moves the former +org.apache.commons.compress.compressors.z._internal_ package which +breaks backwards compatibility for code which used the old package. + +This also changes the superclass of ZCompressorInputStream which makes +this class binary incompatible with the one of Compress 1.9. Code +that extends ZCompressorInputStream will need to be recompiled in +order to work with Compress 1.10. + +New features: +o CompressorStreamFactory can now auto-detect DEFLATE streams + with ZLIB header. + Issue: COMPRESS-316. Thanks to Nick Burch. +o CompressorStreamFactory can now auto-detect LZMA streams. + Issue: COMPRESS-313. +o Added support for parallel compression. This low-level API allows + a client to build a zip/jar file by using the class + org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator. + + Zip documentation updated with further notes about parallel features. + + Please note that some aspects of jar creation need to be + handled by client code and is not part of commons-compress for this + release. + Issue: COMPRESS-296. Thanks to Kristian Rosenvold. +o Cut overall object instantiation in half by changing file + header generation algorithm, for a 10-15 percent performance + improvement. + + Also extracted two private methods createLocalFileHeader + and createCentralFileHeader in ZipArchiveOutputStream. + These may have some interesting additional usages in the + near future. Thanks to Kristian Rosenvold. +o New methods in ZipArchiveOutputStream and ZipFile allows + entries to be copied from one archive to another without + having to re-compress them. + Issue: COMPRESS-295. Thanks to Kristian Rosenvold. + +Fixed Bugs: +o TarArchiveInputStream can now read entries with group or + user ids > 0x80000000. + Issue: COMPRESS-314. +o TarArchiveOutputStream can now write entries with group or + user ids > 0x80000000. + Issue: COMPRESS-315. +o TarArchiveEntry's constructor with a File and a String arg + didn't normalize the name. + Issue: COMPRESS-312. +o ZipEncodingHelper no longer reads system properties directly + to determine the default charset. + Issue: COMPRESS-308. +o BZip2CompressorInputStream#read would return -1 when asked to + read 0 bytes. + Issue: COMPRESS-309. +o ArchiveStreamFactory fails to pass on the encoding when creating + some streams. + * ArjArchiveInputStream + * CpioArchiveInputStream + * DumpArchiveInputStream + * JarArchiveInputStream + * TarArchiveInputStream + * JarArchiveOutputStream + Issue: COMPRESS-306. +o Restore immutability/thread-safety to ArchiveStreamFactory. + The class is now immutable provided that the method setEntryEncoding + is not used. The class is thread-safe. + Issue: COMPRESS-302. +o Restore immutability/thread-safety to CompressorStreamFactory. + The class is now immutable provided that the method + setDecompressConcatenated is not used. The class is thread-safe. + Issue: COMPRESS-303. +o ZipFile logs a warning in its finalizer when its constructor + has thrown an exception reading the file - for example if the + file doesn't exist. + Issue: COMPRESS-297. +o Improved error message when tar encounters a groupId that is + too big to write without using the STAR or POSIX format. + Issue: COMPRESS-290. Thanks to Kristian Rosenvold. +o SevenZFile now throws the specific PasswordRequiredException + when it encounters an encrypted stream but no password has + been specified. + Issue: COMPRESS-298. + +Changes: +o Moved the package + org.apache.commons.compress.compressors.z._internal_ to + org.apache.commons.compress.compressors.lzw and made it part + of the API that is officially supported. This will break + existing code that uses the old package. Thanks to Damjan Jovanovic. + +For complete information on Apache Commons Compress, including instructions +on how to submit bug reports, patches, or suggestions for improvement, +see the Apache Commons Compress website: + +https://commons.apache.org/compress/ + +Old Release Notes +================= + +Release 1.9 +----------- + +New features: +o Added support for DEFLATE streams without any gzip framing. + Issue: COMPRESS-263. + Thanks to Matthias Stevens. + +Fixed Bugs: +o When reading 7z files unknown file properties and properties of type + kDummy are now ignored. + Issue: COMPRESS-287. +o Expanding 7z archives using LZMA compression could cause an + EOFException. + Issue: COMPRESS-286. +o Long-Name and -link or PAX-header entries in TAR archives always had + the current time as last modfication time, creating archives that + are different at the byte level each time an archive was built. + Issue: COMPRESS-289. + Thanks to Bob Robertson. + +Changes: +o Checking for XZ for Java may be expensive. The result will now be + cached outside of an OSGi environment. You can use the new + XZUtils#setCacheXZAvailability to overrride this default behavior. + Issue: COMPRESS-285. + +Release 1.8.1 +------------- + +New features: +o COMPRESS-272: CompressorStreamFactory can now auto-detect Unix compress + (".Z") streams. + +Fixed Bugs: +o COMPRESS-270: The snappy, ar and tar inputstreams might fail to read from a + non-buffered stream in certain cases. +o COMPRESS-277: IOUtils#skip might skip fewer bytes than requested even though + more could be read from the stream. +o COMPRESS-276: ArchiveStreams now validate there is a current entry before + reading or writing entry data. +o ArjArchiveInputStream#canReadEntryData tested the current + entry of the stream rather than its argument. +o COMPRESS-274: ChangeSet#delete and deleteDir now properly deal with unnamed + entries. +o COMPRESS-273: Added a few null checks to improve robustness. +o COMPRESS-278: TarArchiveInputStream failed to read archives with empty + gid/uid fields. +o COMPRESS-279: TarArchiveInputStream now again throws an exception when it + encounters a truncated archive while reading from the last + entry. +o COMPRESS-280: Adapted TarArchiveInputStream#skip to the modified + IOUtils#skip method. Thanks to BELUGA BEHR. + +Changes: +o The dependency on org.tukaani:xz is now marked as optional. + +Release 1.8 +----------- + +New features: +o GzipCompressorInputStream now provides access to the same + metadata that can be provided via GzipParameters when writing + a gzip stream. + Issue: COMPRESS-260. +o SevenZOutputFile now supports chaining multiple + compression/encryption/filter methods and passing options to + the methods. + Issue: COMPRESS-266. +o The (compression) method(s) can now be specified per entry in + SevenZOutputFile. + Issue: COMPRESS-261. +o SevenZArchiveEntry "knows" which method(s) have been used to + write it to the archive. + Issue: COMPRESS-258. +o The 7z package now supports the delta filter as method. +o The 7z package now supports BCJ filters for several platforms. + You will need a version >= 1.5 of XZ for Java to read archives + using BCJ, though. + Issue: COMPRESS-257. + +Fixed Bugs: +o BZip2CompressorInputStream read fewer bytes than possible from + a truncated stream. + Issue: COMPRESS-253. +o SevenZFile failed claiming the dictionary was too large when + archives used LZMA compression for headers and content and + certain non-default dictionary sizes. + Issue: COMPRESS-253. +o CompressorStreamFactory.createCompressorInputStream with + explicit compression did not honor decompressConcatenated + Issue: COMPRESS-259. +o TarArchiveInputStream will now read archives created by tar + implementations that encode big numbers by not adding a + trailing NUL. + Issue: COMPRESS-262. +o ZipArchiveInputStream would return NUL bytes for the first 512 + bytes of a STORED entry if it was the very first entry of the + archive. + Issue: COMPRESS-264. +o When writing PAX/POSIX headers for TAR entries with + backslashes or certain non-ASCII characters in their name + TarArchiveOutputStream could fail. + Issue: COMPRESS-265. +o ArchiveStreamFactory now throws a StreamingNotSupported - a + new subclass of ArchiveException - if it is asked to read from + or write to a stream and Commons Compress doesn't support + streaming for the format. This currently only applies to the + 7z format. + Issue: COMPRESS-267. + +Release 1.7 +----------- + +New features: +o Read-Only support for Snappy compression. + Issue: COMPRESS-147. Thanks to BELUGA BEHR. +o Read-Only support for .Z compressed files. + Issue: COMPRESS-243. Thanks to Damjan Jovanovic. +o ZipFile and ZipArchiveInputStream now support reading entries + compressed using the SHRINKING method. Thanks to Damjan Jovanovic. +o GzipCompressorOutputStream now supports setting the compression + level and the header metadata (filename, comment, modification time, + operating system and extra flags) + Issue: COMPRESS-250. Thanks to Emmanuel Bourg. +o ZipFile and ZipArchiveInputStream now support reading entries + compressed using the IMPLODE method. + Issue: COMPRESS-115. Thanks to Emmanuel Bourg. +o ZipFile and the 7z file classes now implement Closeable and can be + used in try-with-resources constructs. + +Fixed Bugs: +o SevenZOutputFile#closeArchiveEntry throws an exception when using + LZMA2 compression on Java8. Issue: COMPRESS-241. +o 7z reading of big 64bit values could be wrong. + Issue: COMPRESS-244. Thanks to Nico Kruber. +o TarArchiveInputStream could fail to read an archive completely. + Issue: COMPRESS-245. +o The time-setters in X5455_ExtendedTimestamp now set the + corresponding flags explicitly - i.e. they set the bit if the valus + is not-null and reset it otherwise. This may cause + incompatibilities if you use setFlags to unset a bit and later set + the time to a non-null value - the flag will now be set. + Issue: COMPRESS-242. +o SevenZOutputFile would create invalid archives if more than six + empty files or directories were included. Issue: COMPRESS-252. + +Release 1.6 +----------- + +Version 1.6 introduces changes to the internal API of the tar package that +break backwards compatibility in the following rare cases. This version +removes the package private TarBuffer class along with the protected "buffer" +members in TarArchiveInputStream and TarArchiveOutputStream. This change will +only affect you if you have created a subclass of one of the stream classes +and accessed the buffer member or directly used the TarBuffer class. + +Changes in this version include: + +New features: +o Added support for 7z archives. Most compression algorithms + can be read and written, LZMA and encryption are only + supported when reading. Issue: COMPRESS-54. Thanks to Damjan Jovanovic. +o Added read-only support for ARJ archives that don't use + compression. Issue: COMPRESS-226. Thanks to Damjan Jovanovic. +o DumpArchiveInputStream now supports an encoding parameter that + can be used to specify the encoding of file names. +o The CPIO streams now support an encoding parameter that can be + used to specify the encoding of file names. +o Read-only support for LZMA standalone compression has been added. + Issue: COMPRESS-111. + +Fixed Bugs: +o TarBuffer.tryToConsumeSecondEOFRecord could throw a + NullPointerException Issue: COMPRESS-223. Thanks to Jeremy Gustie. +o Parsing of zip64 extra fields has become more lenient in order + to be able to read archives created by DotNetZip and maybe + other archivers as well. Issue: COMPRESS-228. +o TAR will now properly read the names of symbolic links with + long names that use the GNU variant to specify the long file + name. Issue: COMPRESS-229. Thanks to Christoph Gysin. +o ZipFile#getInputStream could return null if the archive + contained duplicate entries. + The class now also provides two new methods to obtain all + entries of a given name rather than just the first one. + Issue: COMPRESS-227. +o CpioArchiveInputStream failed to read archives created by + Redline RPM. Issue: COMPRESS-236. Thanks to Andrew Duffy. +o TarArchiveOutputStream now properly handles link names that + are too long to fit into a traditional TAR header. Issue: + COMPRESS-237. Thanks to Emmanuel Bourg. +o The auto-detecting create*InputStream methods of Archive and + CompressorStreamFactory could fail to detect the format of + blocking input streams. Issue: COMPRESS-239. + +Changes: +o Readabilty patch to TarArchiveInputStream. Issue: + COMPRESS-232. Thanks to BELUGA BEHR. +o Performance improvements to TarArchiveInputStream, in + particular to the skip method. Issue: COMPRESS-234. Thanks to + BELUGA BEHR. + +Release 1.5 +----------- + +New features: + +o CompressorStreamFactory has an option to create decompressing + streams that decompress the full input for formats that support + multiple concatenated streams. + Issue: COMPRESS-220. + +Fixed Bugs: + +o Typo in CompressorStreamFactory Javadoc + Issue: COMPRESS-218. + Thanks to Gili. +o ArchiveStreamFactory's tar stream detection created false positives + for AIFF files. + Issue: COMPRESS-191. + Thanks to Jukka Zitting. +o XZ for Java didn't provide an OSGi bundle. Compress' dependency on + it has now been marked optional so Compress itself can still be used + in an OSGi context. + Issue: COMPRESS-199. + Thanks to Jukka Zitting. +o When specifying the encoding explicitly TarArchiveOutputStream would + write unreadable names in GNU mode or even cause errors in POSIX + mode for file names longer than 66 characters. + Issue: COMPRESS-200. + Thanks to Christian Schlichtherle. +o Writing TAR PAX headers failed if the generated entry name ended + with a "/". + Issue: COMPRESS-203. +o ZipArchiveInputStream sometimes failed to provide input to the + Inflater when it needed it, leading to reads returning 0. + Issue: COMPRESS-189. + Thanks to Daniel Lowe. +o TarArchiveInputStream ignored the encoding for GNU long name + entries. + Issue: COMPRESS-212. +o TarArchiveInputStream could leave the second EOF record inside the + stream it had just finished reading. + Issue: COMPRESS-206. + Thanks to Peter De Maeyer. +o DumpArchiveInputStream no longer implicitly closes the original + input stream when it reaches the end of the archive. +o ZipArchiveInputStream now consumes the remainder of the archive when + getNextZipEntry returns null. +o Unit tests could fail if the source tree was checked out to a + directory tree containign spaces. + Issue: COMPRESS-205. + Thanks to Daniel Lowe. +o Fixed a potential ArrayIndexOutOfBoundsException when reading STORED + entries from ZipArchiveInputStream. + Issue: COMPRESS-219. +o CompressorStreamFactory can now be used without XZ for Java being + available. + Issue: COMPRESS-221. + +Changes: + +o Improved exception message if a zip archive cannot be read because + of an unsupported compression method. + Issue: COMPRESS-188. + Thanks to Harald Kuhn. +o ArchiveStreamFactory has a setting for file name encoding that sets + up encoding for ZIP and TAR streams. + Issue: COMPRESS-192. + Thanks to Jukka Zitting. +o TarArchiveEntry now has a method to verify its checksum. + Issue: COMPRESS-191. + Thanks to Jukka Zitting. +o Split/spanned ZIP archives are now properly detected by + ArchiveStreamFactory but will cause an + UnsupportedZipFeatureException when read. +o ZipArchiveInputStream now reads archives that start with a "PK00" + signature. Archives with this signatures are created when the + archiver was willing to split the archive but in the end only needed + a single segment - so didn't split anything. + Issue: COMPRESS-208. +o TarArchiveEntry has a new constructor that allows setting linkFlag + and preserveLeadingSlashes at the same time. + Issue: COMPRESS-201. +o ChangeSetPerformer has a new perform overload that uses a ZipFile + instance as input. + Issue: COMPRESS-159. +o Garbage collection pressure has been reduced by reusing temporary + byte arrays in classes. + Issue: COMPRESS-172. + Thanks to Thomas Mair. +o Can now handle zip extra field 0x5455 - Extended Timestamp. + Issue: COMPRESS-210. + Thanks to Julius Davies. +o handle zip extra field 0x7875 - Info Zip New Unix Extra Field. + Issue: COMPRESS-211. + Thanks to Julius Davies. +o ZipShort, ZipLong, ZipEightByteInteger should implement Serializable + Issue: COMPRESS-213. + Thanks to Julius Davies. +o better support for unix symlinks in ZipFile entries. + Issue: COMPRESS-214. + Thanks to Julius Davies. +o ZipFile's initialization has been improved for non-Zip64 archives. + Issue: COMPRESS-215. + Thanks to Robin Power. +o Updated XZ for Java dependency to 1.2 as this version provides + proper OSGi manifest attributes. + +Release 1.4.1 +------------- + +This is a security bugfix release, see +https://commons.apache.org/proper/commons-compress/security.html#Fixed_in_Apache_Commons_Compress_1.4.1 + +Fixed Bugs: + +o Ported libbzip2's fallback sort algorithm to + BZip2CompressorOutputStream to speed up compression in certain + edge cases. + +Release 1.4 +----------- + +New features: +o COMPRESS-156: Support for the XZ format has been added. + +Fixed Bugs: +o COMPRESS-183: The tar package now allows the encoding of file names to be + specified and can optionally use PAX extension headers to + write non-ASCII file names. + The stream classes now write (or expect to read) archives that + use the platform's native encoding for file names. Apache + Commons Compress 1.3 used to strip everything but the lower + eight bits of each character which effectively only worked for + ASCII and ISO-8859-1 file names. + This new default behavior is a breaking change. +o COMPRESS-184: TarArchiveInputStream failed to parse PAX headers that + contained non-ASCII characters. +o COMPRESS-178: TarArchiveInputStream throws IllegalArgumentException instead of IOException +o COMPRESS-179: TarUtils.formatLongOctalOrBinaryBytes() assumes the field will be 12 bytes long +o COMPRESS-175: GNU Tar sometimes uses binary encoding for UID and GID +o COMPRESS-171: ArchiveStreamFactory.createArchiveInputStream would claim + short text files were TAR archives. +o COMPRESS-164: ZipFile didn't work properly for archives using unicode extra + fields rather than UTF-8 filenames and the EFS-Flag. +o COMPRESS-169: For corrupt archives ZipFile would throw a RuntimeException in + some cases and an IOException in others. It will now + consistently throw an IOException. + +Changes: +o COMPRESS-182: The tar package can now write archives that use star/GNU/BSD + extensions or use the POSIX/PAX variant to store numeric + values that don't fit into the traditional header fields. +o COMPRESS-181: Added a workaround for a Bug some tar implementations that add + a NUL byte as first byte in numeric header fields. +o COMPRESS-176: Added a workaround for a Bug in WinZIP which uses backslashes + as path separators in Unicode Extra Fields. +o COMPRESS-131: ArrayOutOfBounds while decompressing bz2. Added test case - code already seems to have been fixed. +o COMPRESS-146: BZip2CompressorInputStream now optionally supports reading of + concatenated .bz2 files. +o COMPRESS-154: GZipCompressorInputStream now optionally supports reading of + concatenated .gz files. +o COMPRESS-16: The tar package can now read archives that use star/GNU/BSD + extensions for files that are longer than 8 GByte as well as + archives that use the POSIX/PAX variant. +o COMPRESS-165: The tar package can now write archives that use star/GNU/BSD + extensions for files that are longer than 8 GByte as well as + archives that use the POSIX/PAX variant. +o COMPRESS-166: The tar package can now use the POSIX/PAX variant for writing + entries with names longer than 100 characters. + +Release 1.3 +----------- + +Commons Compress 1.3 is the first version to require Java5 at runtime. + +Changes in this version include: + +New features: +o Support for the Pack200 format has been added. Issue: COMPRESS-142. +o Read-only support for the format used by the Unix dump(8) tool + has been added. Issue: COMPRESS-132. + +Fixed Bugs: +o BZip2CompressorInputStream's getBytesRead method always + returned 0. +o ZipArchiveInputStream and ZipArchiveOutputStream could leak + resources on some JDKs. Issue: COMPRESS-152. +o TarArchiveOutputStream's getBytesWritten method didn't count + correctly. Issue: COMPRESS-160. + +Changes: +o The ZIP package now supports Zip64 extensions. Issue: COMPRESS-36. +o The AR package now supports the BSD dialect of storing file + names longer than 16 chars (both reading and writing). + Issue: COMPRESS-144. + +Release 1.2 +----------- + +New features: +o COMPRESS-123: ZipArchiveEntry has a new method getRawName that provides the + original bytes that made up the name. This may allow user + code to detect the encoding. +o COMPRESS-122: TarArchiveEntry provides access to the flags that determine + whether it is an archived symbolic link, pipe or other + "uncommon" file system object. + +Fixed Bugs: +o COMPRESS-129: ZipArchiveInputStream could fail with a "Truncated ZIP" error + message for entries between 2 GByte and 4 GByte in size. +o COMPRESS-145: TarArchiveInputStream now detects sparse entries using the + oldgnu format and properly reports it cannot extract their + contents. +o COMPRESS-130: The Javadoc for ZipArchiveInputStream#skip now matches the + implementation, the code has been made more defensive. +o COMPRESS-140: ArArchiveInputStream fails if entries contain only blanks for + userId or groupId. Thanks to Trejkaz. +o COMPRESS-139: ZipFile may leak resources on some JDKs. +o COMPRESS-125: BZip2CompressorInputStream throws IOException if + underlying stream returns available() == 0. + Removed the check. +o COMPRESS-127: Calling close() on inputStream returned by + CompressorStreamFactory.createCompressorInputStream() + does not close the underlying input stream. +o COMPRESS-119: TarArchiveOutputStream#finish now writes all buffered + data to the stream + +Changes: +o ZipFile now implements finalize which closes the underlying + file. +o COMPRESS-117: Certain tar files not recognised by + ArchiveStreamFactory. + +Release 1.1 +----------- + +New features: +o COMPRESS-108: Command-line interface to list archive contents. + Usage: java -jar commons-compress-n.m.jar archive-name [zip|tar|etc] +o COMPRESS-109: Tar implementation does not support Pax headers + Added support for reading pax headers. + Note: does not support global pax headers +o COMPRESS-103: ZipArchiveInputStream can optionally extract data that used + the STORED compression method and a data descriptor. + Doing so in a stream is not safe in general, so you have to + explicitly enable the feature. By default the stream will + throw an exception if it encounters such an entry. +o COMPRESS-98: The ZIP classes will throw specialized exceptions if any + attempt is made to read or write data that uses zip features + not supported (yet). +o COMPRESS-99: ZipFile#getEntries returns entries in a predictable order - + the order they appear inside the central directory. + A new method getEntriesInPhysicalOrder returns entries in + order of the entry data, i.e. the order ZipArchiveInputStream + would see. +o The Archive*Stream and ZipFile classes now have + can(Read|Write)EntryData methods that can be used to check + whether a given entry's data can be read/written. + The method currently returns false for ZIP archives if an + entry uses an unsupported compression method or encryption. +o COMPRESS-89: The ZIP classes now detect encrypted entries. +o COMPRESS-97: Added autodetection of compression format to + CompressorStreamFactory. +o COMPRESS-95: Improve ExceptionMessages in ArchiveStreamFactory Thanks to Joerg Bellmann. +o A new constructor of TarArchiveEntry can create entries with + names that start with slashes - the default is to strip + leading slashes in order to create relative path names. +o ArchiveEntry now has a getLastModifiedDate method. +o COMPRESS-78: Add a BZip2Utils class modelled after GZipUtils Thanks to Jukka Zitting. + +Fixed Bugs: +o COMPRESS-72: Move acknowledgements from NOTICE to README +o COMPRESS-113: TarArchiveEntry.parseTarHeader() includes the trailing space/NUL when parsing the octal size +o COMPRESS-118: TarUtils.parseName does not properly handle characters outside the range 0-127 +o COMPRESS-107: ArchiveStreamFactory does not recognise tar files created by Ant +o COMPRESS-110: Support "ustar" prefix field, which is used when file paths are longer + than 100 characters. +o COMPRESS-100: ZipArchiveInputStream will throw an exception if it detects an + entry that uses a data descriptor for a STORED entry since it + cannot reliably find the end of data for this "compression" + method. +o COMPRESS-101: ZipArchiveInputStream should now properly read archives that + use data descriptors but without the "unofficial" signature. +o COMPRESS-74: ZipArchiveInputStream failed to update the number of bytes + read properly. +o ArchiveInputStream has a new method getBytesRead that should + be preferred over getCount since the later may truncate the + number of bytes read for big archives. +o COMPRESS-85: The cpio archives created by CpioArchiveOutputStream couldn't + be read by many existing native implementations because the + archives contained multiple entries with the same inode/device + combinations and weren't padded to a blocksize of 512 bytes. +o COMPRESS-73: ZipArchiveEntry, ZipFile and ZipArchiveInputStream are now + more lenient when parsing extra fields. +o COMPRESS-82: cpio is terribly slow. + Documented that buffered streams are needed for performance +o Improved exception message if the extra field data in ZIP + archives cannot be parsed. +o COMPRESS-17: Tar format unspecified - current support documented. +o COMPRESS-94: ZipArchiveEntry's equals method was broken for entries created + with the String-arg constructor. This lead to broken ZIP + archives if two different entries had the same hash code. Thanks to Anon Devs. +o COMPRESS-87: ZipArchiveInputStream could repeatedly return 0 on read() when + the archive was truncated. Thanks to Antoni Mylka. +o COMPRESS-86: Tar archive entries holding the file name for names longer + than 100 characters in GNU longfile mode didn't properly + specify they'd be using the "oldgnu" extension. +o COMPRESS-83: Delegate all read and write methods in GZip stream in order to + speed up operations. +o The ar and cpio streams now properly read and write last + modified times. +o COMPRESS-81: TarOutputStream can leave garbage at the end of the archive + +Changes: +o COMPRESS-112: ArArchiveInputStream does not handle GNU extended filename records (//) +o COMPRESS-105: Document that the name of an ZipArchiveEntry determines whether + an entry is considered a directory or not. + If you don't use the constructor with the File argument the entry's + name must end in a "/" in order for the entry to be known as a directory. +o COMPRESS-79: Move DOS/Java time conversions into Zip utility class. +o COMPRESS-75: ZipArchiveInputStream does not show location in file + where a problem occurred. + diff --git a/findbugs-exclude-filter.xml b/findbugs-exclude-filter.xml new file mode 100644 index 000000000..d63fe1521 --- /dev/null +++ b/findbugs-exclude-filter.xml @@ -0,0 +1,207 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + This file contains some false positive bugs detected by findbugs. Their + false positive nature has been analyzed individually and they have been + put here to instruct findbugs it must ignore them. +--> +<FindBugsFilter> + + <!-- Reason: References to System.out/err == --> + <Match> + <Class name="org.apache.commons.compress.archivers.Lister" /> + <Or> + <Method name="main" /> + <Method name="usage" /> + </Or> + <Bug pattern="NP_ALWAYS_NULL" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.archivers.zip.ZipFile" /> + <Method name="finalize" /> + <Bug pattern="NP_ALWAYS_NULL" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.archivers.dump.DumpArchiveUtil" /> + <Method name="dumpBlock" /> + <Bug pattern="NP_ALWAYS_NULL" /> + </Match> + + <!-- Reason: fallthrough is intended --> + <Match> + <Class name="org.apache.commons.compress.archivers.zip.ExtraFieldUtils" /> + <Method name="parse" /> + <Bug pattern="SF_SWITCH_FALLTHROUGH" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.compressors.lz4.BlockLZ4CompressorInputStream" /> + <Method name="read" /> + <Bug pattern="SF_SWITCH_FALLTHROUGH" /> + </Match> + + <!-- Reason: fields unused as documented --> + <Match> + <Class name="org.apache.commons.compress.archivers.jar.JarArchiveEntry" /> + <Or> + <Field name="certificates"/> + <Field name="manifestAttributes"/> + </Or> + <Bug pattern="UWF_NULL_FIELD" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.archivers.dump.DumpArchiveEntry" /> + <Field name="summary"/> + <Bug pattern="UWF_NULL_FIELD" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.archivers.sevenz.Folder" /> + <Or> + <Field name="totalInputStreams"/> + </Or> + <Bug pattern="URF_UNREAD_FIELD" /> + </Match> + + <!-- Reason: exception in close swallowed in order to re-throw original in caller --> + <Match> + <Class name="org.apache.commons.compress.utils.IOUtils" /> + <Method name="closeQuietly" /> + <Bug pattern="DE_MIGHT_IGNORE" /> + </Match> + + <!-- Reason: skip(Long.MAX_VALUE) called to drain stream completely, + the class overrides skip to ensure it reads the full amount + until EOF is reached --> + <Match> + <Class name="org.apache.commons.compress.archivers.zip.ZipArchiveInputStream" /> + <Method name="closeEntry" /> + <Bug pattern="SR_NOT_CHECKED" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.archivers.tar.TarArchiveInputStream" /> + <Method name="getNextTarEntry" /> + <Bug pattern="SR_NOT_CHECKED" /> + </Match> + + <!-- Reason: trying to delete a temporary file that has deleteOnExit set + anyway --> + <Match> + <Class name="org.apache.commons.compress.compressors.pack200.Pack200Utils" /> + <Method name="normalize" /> + <Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.compressors.pack200.TempFileCachingStreamBridge$1" /> + <Method name="close" /> + <Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE" /> + </Match> + + <!-- Reason: It is the Pack200*Stream that's going to close it. --> + <Match> + <Class name="org.apache.commons.compress.compressors.pack200.TempFileCachingStreamBridge$1" /> + <Method name="<init>" /> + <Bug pattern="OBL_UNSATISFIED_OBLIGATION" /> + </Match> + + <!-- Reason: the contract is to use default encoding (unless told otherwise) --> + <Match> + <Class name="org.apache.commons.compress.archivers.arj.ArjArchiveInputStream" /> + <Method name="readString" /> + <Bug pattern="DM_DEFAULT_ENCODING" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.archivers.zip.AsiExtraField" /> + <Or> + <Method name="getLocalFileDataData" /> + <Method name="getLocalFileDataLength" /> + <Method name="parseFromLocalFileData" /> + </Or> + <Bug pattern="DM_DEFAULT_ENCODING" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.archivers.zip.FallbackZipEncoding" /> + <Or> + <Method name="decode" /> + <Method name="encode" /> + </Or> + <Bug pattern="DM_DEFAULT_ENCODING" /> + </Match> + + <!-- Reason: default encoding is good enough for exception message --> + <Match> + <Class name="org.apache.commons.compress.archivers.tar.TarUtils" /> + <Method name="exceptionMessage" /> + <Bug pattern="DM_DEFAULT_ENCODING" /> + </Match> + + <!-- Reason: unrolled loop, all possible cases are covered --> + <Match> + <Class name="org.apache.commons.compress.compressors.snappy.PureJavaCrc32C" /> + <Method name="update" /> + <Bug pattern="SF_SWITCH_NO_DEFAULT" /> + </Match> + + <!-- Reason: class only adds unused always-null fields and superclass' equals is sufficient --> + <Match> + <Class name="org.apache.commons.compress.archivers.jar.JarArchiveEntry" /> + <Method name="equals" /> + <Bug pattern="EQ_DOESNT_OVERRIDE_EQUALS" /> + </Match> + + <!-- Reason: failure to delete a file that may not exist. And we really don't care that much either --> + <Match> + <Class name="org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore" /> + <Method name="close" /> + <Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE" /> + </Match> + + <!-- Reason: nested code can throw IllegalArgumentException if + stream is not a TAR stream --> + <Match> + <Class name="org.apache.commons.compress.archivers.ArchiveStreamFactory" /> + <Method name="createArchiveInputStream" /> + <Bug pattern="REC_CATCH_EXCEPTION" /> + </Match> + + <!-- the class wants to allow outside access to the array, just like + ByteBuffer.wrap does --> + <Match> + <Class name="org.apache.commons.compress.utils.SeekableInMemoryByteChannel" /> + <Method name="array" /> + <Bug pattern="EI_EXPOSE_REP" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.utils.SeekableInMemoryByteChannel" /> + <Method name="<init>" /> + <Bug pattern="EI_EXPOSE_REP2" /> + </Match> + + <!-- the array is exposed deliberately to improve performance and it + is documented that way --> + <Match> + <Class name="org.apache.commons.compress.compressors.lz77support.LZ77Compressor$LiteralBlock" /> + <Method name="getData" /> + <Bug pattern="EI_EXPOSE_REP" /> + </Match> + <Match> + <Class name="org.apache.commons.compress.compressors.lz77support.LZ77Compressor$LiteralBlock" /> + <Method name="<init>" /> + <Bug pattern="EI_EXPOSE_REP2" /> + </Match> + +</FindBugsFilter> @@ -0,0 +1,225 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Maven2 Start Up Batch script +# +# Required ENV vars: +# ------------------ +# JAVA_HOME - location of a JDK home dir +# +# Optional ENV vars +# ----------------- +# M2_HOME - location of maven2's installed home dir +# MAVEN_OPTS - parameters passed to the Java VM when running Maven +# e.g. to debug Maven itself, use +# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +# MAVEN_SKIP_RC - flag to disable loading of mavenrc files +# ---------------------------------------------------------------------------- + +if [ -z "$MAVEN_SKIP_RC" ] ; then + + if [ -f /etc/mavenrc ] ; then + . /etc/mavenrc + fi + + if [ -f "$HOME/.mavenrc" ] ; then + . "$HOME/.mavenrc" + fi + +fi + +# OS specific support. $var _must_ be set to either true or false. +cygwin=false; +darwin=false; +mingw=false +case "`uname`" in + CYGWIN*) cygwin=true ;; + MINGW*) mingw=true;; + Darwin*) darwin=true + # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home + # See https://developer.apple.com/library/mac/qa/qa1170/_index.html + if [ -z "$JAVA_HOME" ]; then + if [ -x "/usr/libexec/java_home" ]; then + export JAVA_HOME="`/usr/libexec/java_home`" + else + export JAVA_HOME="/Library/Java/Home" + fi + fi + ;; +esac + +if [ -z "$JAVA_HOME" ] ; then + if [ -r /etc/gentoo-release ] ; then + JAVA_HOME=`java-config --jre-home` + fi +fi + +if [ -z "$M2_HOME" ] ; then + ## resolve links - $0 may be a link to maven's home + PRG="$0" + + # need this for relative symlinks + while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG="`dirname "$PRG"`/$link" + fi + done + + saveddir=`pwd` + + M2_HOME=`dirname "$PRG"`/.. + + # make it fully qualified + M2_HOME=`cd "$M2_HOME" && pwd` + + cd "$saveddir" + # echo Using m2 at $M2_HOME +fi + +# For Cygwin, ensure paths are in UNIX format before anything is touched +if $cygwin ; then + [ -n "$M2_HOME" ] && + M2_HOME=`cygpath --unix "$M2_HOME"` + [ -n "$JAVA_HOME" ] && + JAVA_HOME=`cygpath --unix "$JAVA_HOME"` + [ -n "$CLASSPATH" ] && + CLASSPATH=`cygpath --path --unix "$CLASSPATH"` +fi + +# For Migwn, ensure paths are in UNIX format before anything is touched +if $mingw ; then + [ -n "$M2_HOME" ] && + M2_HOME="`(cd "$M2_HOME"; pwd)`" + [ -n "$JAVA_HOME" ] && + JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" + # TODO classpath? +fi + +if [ -z "$JAVA_HOME" ]; then + javaExecutable="`which javac`" + if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then + # readlink(1) is not available as standard on Solaris 10. + readLink=`which readlink` + if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then + if $darwin ; then + javaHome="`dirname \"$javaExecutable\"`" + javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" + else + javaExecutable="`readlink -f \"$javaExecutable\"`" + fi + javaHome="`dirname \"$javaExecutable\"`" + javaHome=`expr "$javaHome" : '\(.*\)/bin'` + JAVA_HOME="$javaHome" + export JAVA_HOME + fi + fi +fi + +if [ -z "$JAVACMD" ] ; then + if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + else + JAVACMD="`which java`" + fi +fi + +if [ ! -x "$JAVACMD" ] ; then + echo "Error: JAVA_HOME is not defined correctly." >&2 + echo " We cannot execute $JAVACMD" >&2 + exit 1 +fi + +if [ -z "$JAVA_HOME" ] ; then + echo "Warning: JAVA_HOME environment variable is not set." +fi + +CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher + +# traverses directory structure from process work directory to filesystem root +# first directory with .mvn subdirectory is considered project base directory +find_maven_basedir() { + + if [ -z "$1" ] + then + echo "Path not specified to find_maven_basedir" + return 1 + fi + + basedir="$1" + wdir="$1" + while [ "$wdir" != '/' ] ; do + if [ -d "$wdir"/.mvn ] ; then + basedir=$wdir + break + fi + # workaround for JBEAP-8937 (on Solaris 10/Sparc) + if [ -d "${wdir}" ]; then + wdir=`cd "$wdir/.."; pwd` + fi + # end of workaround + done + echo "${basedir}" +} + +# concatenates all lines of a file +concat_lines() { + if [ -f "$1" ]; then + echo "$(tr -s '\n' ' ' < "$1")" + fi +} + +BASE_DIR=`find_maven_basedir "$(pwd)"` +if [ -z "$BASE_DIR" ]; then + exit 1; +fi + +export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} +echo $MAVEN_PROJECTBASEDIR +MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" + +# For Cygwin, switch paths to Windows format before running java +if $cygwin; then + [ -n "$M2_HOME" ] && + M2_HOME=`cygpath --path --windows "$M2_HOME"` + [ -n "$JAVA_HOME" ] && + JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` + [ -n "$CLASSPATH" ] && + CLASSPATH=`cygpath --path --windows "$CLASSPATH"` + [ -n "$MAVEN_PROJECTBASEDIR" ] && + MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` +fi + +WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +exec "$JAVACMD" \ + $MAVEN_OPTS \ + -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ + "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ + ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" diff --git a/mvnw.cmd b/mvnw.cmd new file mode 100644 index 000000000..019bd74d7 --- /dev/null +++ b/mvnw.cmd @@ -0,0 +1,143 @@ +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Maven2 Start Up Batch script +@REM +@REM Required ENV vars: +@REM JAVA_HOME - location of a JDK home dir +@REM +@REM Optional ENV vars +@REM M2_HOME - location of maven2's installed home dir +@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands +@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending +@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven +@REM e.g. to debug Maven itself, use +@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files +@REM ---------------------------------------------------------------------------- + +@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' +@echo off +@REM enable echoing my setting MAVEN_BATCH_ECHO to 'on' +@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% + +@REM set %HOME% to equivalent of $HOME +if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") + +@REM Execute a user defined script before this one +if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre +@REM check for pre script, once with legacy .bat ending and once with .cmd ending +if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" +if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" +:skipRcPre + +@setlocal + +set ERROR_CODE=0 + +@REM To isolate internal variables from possible post scripts, we use another setlocal +@setlocal + +@REM ==== START VALIDATION ==== +if not "%JAVA_HOME%" == "" goto OkJHome + +echo. +echo Error: JAVA_HOME not found in your environment. >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +:OkJHome +if exist "%JAVA_HOME%\bin\java.exe" goto init + +echo. +echo Error: JAVA_HOME is set to an invalid directory. >&2 +echo JAVA_HOME = "%JAVA_HOME%" >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +@REM ==== END VALIDATION ==== + +:init + +@REM Find the project base dir, i.e. the directory that contains the folder ".mvn". +@REM Fallback to current working directory if not found. + +set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% +IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir + +set EXEC_DIR=%CD% +set WDIR=%EXEC_DIR% +:findBaseDir +IF EXIST "%WDIR%"\.mvn goto baseDirFound +cd .. +IF "%WDIR%"=="%CD%" goto baseDirNotFound +set WDIR=%CD% +goto findBaseDir + +:baseDirFound +set MAVEN_PROJECTBASEDIR=%WDIR% +cd "%EXEC_DIR%" +goto endDetectBaseDir + +:baseDirNotFound +set MAVEN_PROJECTBASEDIR=%EXEC_DIR% +cd "%EXEC_DIR%" + +:endDetectBaseDir + +IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig + +@setlocal EnableExtensions EnableDelayedExpansion +for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a +@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% + +:endReadAdditionalConfig + +SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" + +set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" +set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +%MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* +if ERRORLEVEL 1 goto error +goto end + +:error +set ERROR_CODE=1 + +:end +@endlocal & set ERROR_CODE=%ERROR_CODE% + +if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost +@REM check for post script, once with legacy .bat ending and once with .cmd ending +if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" +if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" +:skipRcPost + +@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' +if "%MAVEN_BATCH_PAUSE%" == "on" pause + +if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% + +exit /B %ERROR_CODE% diff --git a/pmd-ruleset.xml b/pmd-ruleset.xml new file mode 100644 index 000000000..8ad115306 --- /dev/null +++ b/pmd-ruleset.xml @@ -0,0 +1,34 @@ +<?xml version="1.0"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> +<ruleset name="compress" + xmlns="http://pmd.sf.net/ruleset/1.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://pmd.sf.net/ruleset/1.0.0 http://pmd.sf.net/ruleset_xml_schema.xsd" + xsi:noNamespaceSchemaLocation="http://pmd.sf.net/ruleset_xml_schema.xsd"> + <description>Standard Ruleset but excluding the "no octal + constants" rule</description> + <rule ref="rulesets/java/basic.xml"> + <exclude name="AvoidUsingOctalValues"/> + </rule> + <rule ref="rulesets/java/finalizers.xml"/> + <rule ref="rulesets/java/imports.xml"/> + <rule ref="rulesets/java/unusedcode.xml"/> + <rule ref="rulesets/java/braces.xml"/> +</ruleset> diff --git a/pom.xml b/pom.xml new file mode 100644 index 000000000..47d7c442a --- /dev/null +++ b/pom.xml @@ -0,0 +1,541 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.commons</groupId> + <artifactId>commons-parent</artifactId> + <version>47</version> + </parent> + + <artifactId>commons-compress</artifactId> + <version>1.19-SNAPSHOT</version> + <name>Apache Commons Compress</name> + <url>https://commons.apache.org/proper/commons-compress/</url> + <!-- The description is not indented to make it look better in the release notes --> + <description> +Apache Commons Compress software defines an API for working with +compression and archive formats. These include: bzip2, gzip, pack200, +lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, +Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj. + </description> + + <properties> + <maven.compiler.source>1.7</maven.compiler.source> + <maven.compiler.target>1.7</maven.compiler.target> + <commons.componentid>compress</commons.componentid> + <commons.module.name>org.apache.commons.compress</commons.module.name> + <commons.jira.id>COMPRESS</commons.jira.id> + <commons.jira.pid>12310904</commons.jira.pid> + <!-- configuration bits for cutting a release candidate --> + <commons.release.version>${project.version}</commons.release.version> + <commons.rc.version>RC1</commons.rc.version> + <powermock.version>1.7.3</powermock.version> + <commons.pmd-plugin.version>3.8</commons.pmd-plugin.version> + + <commons.manifestlocation>${project.build.outputDirectory}/META-INF</commons.manifestlocation> + <commons.manifestfile>${commons.manifestlocation}/MANIFEST.MF</commons.manifestfile> + <commons.osgi.import> + org.tukaani.xz;resolution:=optional, + org.brotli.dec;resolution:=optional, + com.github.luben.zstd;resolution:=optional, + javax.crypto.*;resolution:=optional, + * + </commons.osgi.import> + + <!-- only show issues of the current version --> + <commons.changes.onlyCurrentVersion>true</commons.changes.onlyCurrentVersion> + + <!-- generate report even if there are binary incompatible changes --> + <commons.japicmp.breakBuildOnBinaryIncompatibleModifications>false</commons.japicmp.breakBuildOnBinaryIncompatibleModifications> + <!-- 0.12.0 dies with a NullPointerException --> + <commons.japicmp.version>0.11.1</commons.japicmp.version> + + <!-- definition uses commons.componentId starting with parent 47, + this doesn't work for us --> + <commons.scmPubUrl>https://svn.apache.org/repos/infra/websites/production/commons/content/proper/${project.artifactId}</commons.scmPubUrl> + + <pax.exam.version>4.11.0</pax.exam.version> + <slf4j.version>1.7.21</slf4j.version> + </properties> + + <issueManagement> + <system>jira</system> + <url>https://issues.apache.org/jira/browse/COMPRESS</url> + </issueManagement> + + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.12</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.github.luben</groupId> + <artifactId>zstd-jni</artifactId> + <version>1.3.3-3</version> + <optional>true</optional> + </dependency> + <dependency> + <groupId>org.brotli</groupId> + <artifactId>dec</artifactId> + <version>0.1.2</version> + <optional>true</optional> + </dependency> + <dependency> + <groupId>org.tukaani</groupId> + <artifactId>xz</artifactId> + <version>1.8</version> + <optional>true</optional> + </dependency> + <dependency> + <groupId>org.powermock</groupId> + <artifactId>powermock-module-junit4</artifactId> + <version>${powermock.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.powermock</groupId> + <artifactId>powermock-api-mockito</artifactId> + <version>${powermock.version}</version> + <scope>test</scope> + </dependency> + + <!-- integration test verifiying OSGi bundle works --> + <dependency> + <groupId>org.ops4j.pax.exam</groupId> + <artifactId>pax-exam-container-native</artifactId> + <version>${pax.exam.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.ops4j.pax.exam</groupId> + <artifactId>pax-exam-junit4</artifactId> + <version>${pax.exam.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.ops4j.pax.exam</groupId> + <artifactId>pax-exam-cm</artifactId> + <version>${pax.exam.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.ops4j.pax.exam</groupId> + <artifactId>pax-exam-link-mvn</artifactId> + <version>${pax.exam.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.felix</groupId> + <artifactId>org.apache.felix.framework</artifactId> + <version>5.6.10</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>javax.inject</groupId> + <artifactId>javax.inject</artifactId> + <version>1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + <version>${slf4j.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.osgi</groupId> + <artifactId>org.osgi.core</artifactId> + <version>6.0.0</version> + <scope>provided</scope> + </dependency> + </dependencies> + + <developers> + <developer> + <name>Torsten Curdt</name> + <id>tcurdt</id> + <email>tcurdt at apache.org</email> + </developer> + <developer> + <name>Stefan Bodewig</name> + <id>bodewig</id> + <email>bodewig at apache.org</email> + </developer> + <developer> + <name>Sebastian Bazley</name> + <id>sebb</id> + <email>sebb at apache.org</email> + </developer> + <developer> + <name>Christian Grobmeier</name> + <id>grobmeier</id> + <email>grobmeier at apache.org</email> + </developer> + <developer> + <name>Julius Davies</name> + <id>julius</id> + <email>julius at apache.org</email> + </developer> + <developer> + <name>Damjan Jovanovic</name> + <id>damjan</id> + <email>damjan at apache.org</email> + </developer> + <developer> + <name>Emmanuel Bourg</name> + <id>ebourg</id> + <email>ebourg at apache.org</email> + </developer> + <developer> + <name>Gary Gregory</name> + <id>ggregory</id> + <email>ggregory at apache.org</email> + </developer> + <developer> + <name>Rob Tompkins</name> + <id>chtompki</id> + <email>chtompki at apache.org</email> + </developer> + </developers> + + <contributors> + <contributor> + <name>Wolfgang Glas</name> + <email>wolfgang.glas at ev-i.at</email> + </contributor> + <contributor> + <name>Christian Kohlschütte</name> + <email>ck@newsclub.de</email> + </contributor> + <contributor> + <name>Bear Giles</name> + <email>bgiles@coyotesong.com</email> + </contributor> + <contributor> + <name>Michael Kuss</name> + <email>mail at michael minus kuss.de</email> + </contributor> + <contributor> + <name>Lasse Collin</name> + <email>lasse.collin@tukaani.org</email> + </contributor> + <contributor> + <name>John Kodis</name> + </contributor> + <contributor> + <name>BELUGA BEHR</name> + </contributor> + <contributor> + <name>Simon Spero</name> + <email>sesuncedu@gmail.com</email> + </contributor> + <contributor> + <name>Michael Hausegger</name> + <email>hausegger.michael@googlemail.com</email> + </contributor> + </contributors> + + <scm> + <connection>scm:git:https://git-wip-us.apache.org/repos/asf/commons-compress.git</connection> + <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/commons-compress.git</developerConnection> + <url>https://git-wip-us.apache.org/repos/asf?p=commons-compress.git</url> + </scm> + + <build> + <pluginManagement> + <plugins> + <!-- Override Javadoc config in parent pom to add JCIP tags --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <version>${commons.javadoc.version}</version> + <configuration> + <quiet>true</quiet> + <source>${maven.compiler.source}</source> + <encoding>${commons.encoding}</encoding> + <docencoding>${commons.docEncoding}</docencoding> + <linksource>true</linksource> + <links> + <link>${commons.javadoc.java.link}</link> + <link>${commons.javadoc.javaee.link}</link> + </links> + <tags> + <tag> + <name>Immutable</name> + <placement>a</placement> + <head>This class is immutable</head> + </tag> + <tag> + <name>NotThreadSafe</name> + <placement>a</placement> + <head>This class is not thread-safe</head> + </tag> + <tag> + <name>ThreadSafe</name> + <placement>a</placement> + <head>This class is thread-safe</head> + </tag> + </tags> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <version>${commons.rat.version}</version> + <configuration> + <excludes> + <!-- files used during tests --> + <exclude>src/test/resources/**</exclude> + <exclude>.pmd</exclude> + <exclude>.projectile</exclude> + <exclude>.mvn/**</exclude> + </excludes> + </configuration> + </plugin> + <plugin> + <groupId>org.eluder.coveralls</groupId> + <artifactId>coveralls-maven-plugin</artifactId> + <configuration> + <failOnServiceError>false</failOnServiceError> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.felix</groupId> + <artifactId>maven-bundle-plugin</artifactId> + <version>${commons.felix.version}</version> + </plugin> + <!-- override skip property of parent pom, can be removed once + we adopt parent 48+ --> + <plugin> + <groupId>com.github.siom79.japicmp</groupId> + <artifactId>japicmp-maven-plugin</artifactId> + <configuration> + <skip>false</skip> + </configuration> + </plugin> + </plugins> + </pluginManagement> + <plugins> + <plugin> + <!-- create the source and binary assemblies --> + <artifactId>maven-assembly-plugin</artifactId> + <configuration> + <descriptors> + <descriptor>src/assembly/bin.xml</descriptor> + <descriptor>src/assembly/src.xml</descriptor> + </descriptors> + <tarLongFileMode>gnu</tarLongFileMode> + </configuration> + </plugin> + <plugin> + <artifactId>maven-jar-plugin</artifactId> + <configuration> + <archive> + <manifestEntries> + <Main-Class>org.apache.commons.compress.archivers.Lister</Main-Class> + <Extension-Name>org.apache.commons.compress</Extension-Name> + <Automatic-Module-Name>${commons.module.name}</Automatic-Module-Name> + </manifestEntries> + </archive> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.felix</groupId> + <artifactId>maven-bundle-plugin</artifactId> + <configuration> + <manifestLocation>${commons.manifestlocation}</manifestLocation> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-scm-publish-plugin</artifactId> + <configuration> + <ignorePathsToDelete> + <ignorePathToDelete>javadocs</ignorePathToDelete> + </ignorePathsToDelete> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-pmd-plugin</artifactId> + <version>${commons.pmd-plugin.version}</version> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <phase>process-test-resources</phase> + <configuration> + <target> + <untar src="${basedir}/src/test/resources/zstd-tests.tar" + dest="${project.build.testOutputDirectory}" + /> + </target> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <systemPropertyVariables> + <pax.exam.karaf.version>${karaf.version}</pax.exam.karaf.version> + <commons-compress.version>${project.version}</commons-compress.version> + </systemPropertyVariables> + </configuration> + </plugin> + </plugins> + </build> + + <reporting> + <plugins> + <plugin> + <!-- generate the PMD reports --> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-pmd-plugin</artifactId> + <version>${commons.pmd-plugin.version}</version> + <configuration> + <minimumTokens>200</minimumTokens> + <targetJdk>${maven.compiler.source}</targetJdk> + <rulesets> + <ruleset>${basedir}/pmd-ruleset.xml</ruleset> + </rulesets> + </configuration> + </plugin> + <!-- Override Javadoc config in parent pom to add JCIP tags --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <configuration> + <quiet>true</quiet> + <source>${maven.compiler.source}</source> + <encoding>${commons.encoding}</encoding> + <docencoding>${commons.docEncoding}</docencoding> + <linksource>true</linksource> + <links> + <link>${commons.javadoc.java.link}</link> + <link>${commons.javadoc.javaee.link}</link> + </links> + <tags> + <tag> + <name>Immutable</name> + <placement>a</placement> + <head>This class is immutable</head> + </tag> + <tag> + <name>NotThreadSafe</name> + <placement>a</placement> + <head>This class is not thread-safe</head> + </tag> + <tag> + <name>ThreadSafe</name> + <placement>a</placement> + <head>This class is thread-safe</head> + </tag> + </tags> + </configuration> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>findbugs-maven-plugin</artifactId> + <version>3.0.5</version> + <configuration> + <threshold>Normal</threshold> + <effort>Default</effort> + <excludeFilterFile>${basedir}/findbugs-exclude-filter.xml</excludeFilterFile> + </configuration> + </plugin> + </plugins> + </reporting> + + <profiles> + <!-- Add long running tests as **/*IT.java --> + <profile> + <id>run-zipit</id> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <phase>process-test-resources</phase> + <configuration> + <target> + <untar src="${basedir}/src/test/resources/zip64support.tar.bz2" + dest="${project.build.testOutputDirectory}" + compression="bzip2"/> + </target> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <includes> + <include>**/zip/*IT.java</include> + </includes> + </configuration> + </plugin> + </plugins> + </build> + </profile> + <profile> + <id>run-tarit</id> + <build> + <plugins> + <plugin> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <includes> + <include>**/tar/*IT.java</include> + </includes> + </configuration> + </plugin> + </plugins> + </build> + </profile> + <profile> + <id>java9+</id> + <activation> + <jdk>[9,)</jdk> + </activation> + <properties> + <maven.compiler.release>9</maven.compiler.release> + <commons.jacoco.version>0.7.9</commons.jacoco.version> + <animal.sniffer.skip>true</animal.sniffer.skip> + <!-- coverall version 4.3.0 does not work with java 9, see https://github.com/trautonen/coveralls-maven-plugin/issues/112 --> + <coveralls.skip>true</coveralls.skip> + </properties> + </profile> + + </profiles> + +</project> diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml new file mode 100644 index 000000000..a58c2e573 --- /dev/null +++ b/src/assembly/bin.xml @@ -0,0 +1,45 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<assembly> + <id>bin</id> + <formats> + <format>tar.gz</format> + <format>zip</format> + </formats> + <includeSiteDirectory>false</includeSiteDirectory> + <fileSets> + <fileSet> + <includes> + <include>LICENSE.txt</include> + <include>NOTICE.txt</include> + <include>README.txt</include> + <include>RELEASE-NOTES.txt</include> + </includes> + </fileSet> + <fileSet> + <directory>target</directory> + <outputDirectory></outputDirectory> + <includes> + <include>*.jar</include> + </includes> + </fileSet> + <fileSet> + <directory>target/site/apidocs</directory> + <outputDirectory>apidocs</outputDirectory> + </fileSet> + </fileSets> +</assembly> diff --git a/src/assembly/src.xml b/src/assembly/src.xml new file mode 100644 index 000000000..70fe14ac8 --- /dev/null +++ b/src/assembly/src.xml @@ -0,0 +1,40 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<assembly> + <id>src</id> + <formats> + <format>tar.gz</format> + <format>zip</format> + </formats> + <baseDirectory>${artifactId}-${commons.release.version}-src</baseDirectory> + <fileSets> + <fileSet> + <includes> + <include>LICENSE.txt</include> + <include>NOTICE.txt</include> + <include>README.txt</include> + <include>RELEASE-NOTES.txt</include> + <include>pom.xml</include> + <include>findbugs-exclude-filter.xml</include> + <include>pmd-ruleset.xml</include> + </includes> + </fileSet> + <fileSet> + <directory>src</directory> + </fileSet> + </fileSets> +</assembly> diff --git a/src/changes/changes.xml b/src/changes/changes.xml new file mode 100644 index 000000000..77be50af3 --- /dev/null +++ b/src/changes/changes.xml @@ -0,0 +1,1468 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<!-- +This file is also used by the maven-changes-plugin to generate the release notes. +Useful ways of finding items to add to this file are: + +1. Add items when you fix a bug or add a feature (this makes the +release process easy :-). + +2. Do a JIRA search for tickets closed since the previous release. + +3. Use the report generated by the maven-changelog-plugin to see all +SVN commits. TBA how to use this with SVN. + +To generate the release notes from this file: + +mvn changes:announcement-generate -Prelease-notes [-Dchanges.version=m.n] + +The <action> type attribute can be add,update,fix,remove. +--> + +<document> + <properties> + <title>commons-compress</title> + </properties> + <body> + <release version="1.19" date="not released, yet" + description="Release 1.19"> + <action type="fix" date="2018-09-07"> + ZipArchiveInputStream could forget the compression level has + changed under certain circumstances. + </action> + </release> + <release version="1.18" date="2018-08-16" + description="Release 1.18"> + <action type="fix" date="2018-06-15" due-to="DidierLoiseau"> + The example Expander class has been vulnerable to a path + traversal in the edge case that happens when the target + directory has a sibling directory and the name of the target + directory is a prefix of the sibling directory's name. + </action> + <action issue="COMPRESS-456" type="fix" date="2018-06-19"> + Changed the OSGi Import-Package to also optionally import + javax.crypto so encrypted archives can be read. + </action> + <action issue="COMPRESS-457" type="fix" date="2018-07-01"> + Changed various implementations of the close method to better + ensure all held resources get closed even if exceptions are + thrown during the closing the stream. + </action> + <action issue="COMPRESS-455" type="fix" date="2018-07-01"> + ZipArchiveInputStream can now detect the APK Signing Block + used in signed Android APK files and treats it as an "end of + archive" marker. + </action> + <action issue="COMPRESS-459" type="fix" date="2018-07-11" + due-to="Jens Reimann"> + The cpio streams didn't handle archives using a multi-byte + encoding properly. + </action> + <action issue="COMPRESS-460" type="add" date="2018-07-28" + due-to="Carmi Grushko"> + It is now possible to specify the arguments of zstd-jni's + ZstdOutputStream constructors via Commons Compress as well. + </action> + <action issue="COMPRESS-463" type="fix" date="2018-08-09"> + ZipArchiveInputStream#read would silently return -1 on a + corrupted stored entry and even return > 0 after hitting the + end of the archive. + </action> + <action issue="COMPRESS-462" type="fix" date="2018-08-10"> + ArArchiveInputStream#read would allow to read from the stream + without opening an entry at all. + </action> + </release> + <release version="1.17" date="2018-06-03" + description="Release 1.17"> + <action type="fix" date="2018-02-06"> + Removed the objenesis dependency from the pom as it is not + needed at all. + </action> + <action issue="COMPRESS-446" type="fix" date="2018-03-29"> + Fixed resource leak in ParallelScatterZipCreator#writeTo. + </action> + <action type="update" date="2018-04-01" due-to="Marchenko Sergey"> + Fixed some code examples. + Github Pull Request #63. + </action> + <action issue="COMPRESS-447" type="fix" date="2018-04-22"> + Certain errors when parsing ZIP extra fields in corrupt + archives are now turned into ZipException, they used to + manifest as ArrayIndexOutOfBoundsException before. + </action> + <action issue="COMPRESS-445" type="update" date="2018-04-22" + due-to="Andreas Beeker"> + The streams returned by ZipFile and most other decompressing + streams now provide information about the number of compressed + and uncompressed bytes read so far. This may be used to detect + a ZipBomb if the compression ratio exceeds a certain + threshold, for example. + For SevenZFile a new method returns the statistics for the + current entry. + </action> + <action issue="COMPRESS-443" type="add" date="2018-04-25"> + Added a unit test that is supposed to fail if we break the + OSGi manifest entries again. + </action> + <action issue="COMPRESS-449" type="add" date="2018-05-02"> + Add a new SkipShieldingInputStream class that can be used with + streams that throw an IOException when skip is invoked. + </action> + <action issue="COMPRESS-451" type="fix" date="2018-05-04"> + IOUtils.copy now verifies the buffer size is bigger than 0. + </action> + <action issue="COMPRESS-452" type="add" date="2018-05-09"> + New constructors have been added to SevenZFile that accept + char[]s rather than byte[]s in order to avoid a common error + of using the wrong encoding when creating the byte[]. This + change may break source compatibility for client code that + uses one of the constructors expecting a password and passes + in null as password. We recommend to change the code to use a + constructor without password argument. + </action> + <action issue="COMPRESS-453" type="update" date="2018-05-24"> + Added a workaround for a bug in AdoptOpenJDK for S/390 to + BZip2CompressorInputStream. + </action> + <action issue="COMPRESS-454" type="fix" date="2018-05-30"> + ZipArchiveInputStream failed to read some files with stored + entries using a data descriptor. + </action> + </release> + <release version="1.16.1" date="2018-02-10" + description="Release 1.16.1"> + <action issue="COMPRESS-442" type="fix" date="2018-02-06"> + Fixed the OSGi manifest entry for imports that has been broken + in 1.16. + </action> + </release> + <release version="1.16" date="2018-02-05" + description="Release 1.16"> + <action issue="COMPRESS-423" type="add" date="2017-10-17" + due-to="Andre F de Miranda"> + Add read-only support for Zstandard compression based on the + Zstd-jni project. + </action> + <action issue="COMPRESS-425" type="add" date="2017-10-22"> + Added auto-detection for Zstandard compressed streams. + </action> + <action issue="COMPRESS-430" type="fix" date="2017-11-25" + due-to="Bruno P. Kinoshita"> + Synchronized iteration over a synchronizedList in ParallelScatterZipCreator. + </action> + <action issue="COMPRESS-432" type="fix" date="2017-12-22"> + ZipFile could get stuck in an infinite loop when parsing ZIP + archives with certain strong encryption headers. + </action> + <action issue="COMPRESS-435" type="update" date="2017-12-27" + due-to="BELUGA BEHR"> + Replaces instanceof checks with a type marker in LZ77 support code. + </action> + <action issue="COMPRESS-426" type="add" date="2017-12-28"> + Added write-support for Zstandard compression. + </action> + <action issue="COMPRESS-424" type="fix" date="2017-12-30"> + Added improved checks to detect corrupted bzip2 streams and + throw the expected IOException rather than obscure + RuntimeExceptions. + </action> + <action type="update" date="2018-01-04"> + Updated XZ for Java dependency to 1.8 in order to pick up bug + fix to LZMA2InputStream's available method. + </action> + <action type="update" date="2018-01-05" issue="COMPRESS-429" + due-to="Damiano Albani"> + ZipArchiveEntry now exposes how the name or comment have been + determined when the entry was read. + </action> + <action issue="COMPRESS-380" type="add" date="2018-01-09" + due-to="Christian Marquez Grabia"> + Added read-only DEFLATE64 support to ZIP archives and as + stand-alone CompressorInputStream. + </action> + <action issue="COMPRESS-438" type="update" date="2018-01-10"> + ZipFile.getInputStream will now always buffer the stream + internally in order to improve read performance. + </action> + <action issue="COMPRESS-440" type="update" date="2018-01-12" + due-to="Dawid Weiss"> + Speed improvement for DEFLATE64 decompression. + </action> + <action issue="COMPRESS-437" type="add" date="2018-01-13"> + Added read-only DEFLATE64 support to 7z archives. + </action> + <action issue="COMPRESS-436" type="update" date="2018-01-14"> + Added a few extra sanity checks for the rarer compression + methods used in ZIP archives. + </action> + <action issue="COMPRESS-441" type="update" date="2018-01-14"> + Simplified the special handling for the dummy byte required by + zlib when using java.util.zip.Inflater. + </action> + <action type="update" date="2018-01-18" due-to="Shahab Kondri"> + Various code cleanups. + Github Pull Request #61. + </action> + <action type="update" date="2018-01-29"> + TarArchiveEntry's preserveLeadingSlashes constructor argument + has been renamed and can now also be used to preserve the + drive letter on Windows. + </action> + </release> + <release version="1.15" date="2017-10-17" + description="Release 1.15 +---------------------------------------- + +TarArchiveOutputStream now ensures record size is 512 and block size is +a multiple of 512 as any other value would create invalid tar +archives. This may break compatibility for code that deliberately +wanted to create such files."> + <action issue="COMPRESS-394" type="fix" date="2017-05-22"> + Make sure "version needed to extract" in local file header and + central directory of a ZIP archive agree with each other. + Also ensure the version is set to 2.0 if DEFLATE is used. + </action> + <action issue="COMPRESS-395" type="fix" date="2017-05-22"> + Don't use a data descriptor in ZIP archives when copying a raw + entry that already knows its size and CRC information. + </action> + <action issue="COMPRESS-413" type="fix" date="2017-05-22" due-to="Simon Spero"> + Travis build redundantly repeats compilation and tests redundantly #43. + </action> + <action issue="COMPRESS-397" type="add" date="2017-05-22"> + Added magic MANIFEST entry Automatic-Module-Name so the module + name will be org.apache.commons.compress when the jar is used + as an automatic module in Java9. + </action> + <action issue="COMPRESS-396" type="fix" date="2017-05-23"> + The MANIFEST of 1.14 lacks an OSGi Import-Package for XZ for + Java. + </action> + <action issue="COMPRESS-406" type="fix" date="2017-06-12" + due-to="Simon Spero"> + BUILDING.md now passes the RAT check. + </action> + <action issue="COMPRESS-405" type="add" date="2017-06-15" + due-to="Simon Spero "> + Added a new utility class FixedLengthBlockOutputStream that + can be used to ensure writing always happens in blocks of a + given size. + </action> + <action issue="COMPRESS-412" type="fix" date="2017-06-17" + due-to="Michael Hausegger"> + Made sure ChecksumCalculatingInputStream receives valid + checksum and input stream instances via the constructor. + </action> + <action issue="COMPRESS-407" type="fix" date="2017-06-24" + due-to="Simon Spero "> + TarArchiveOutputStream now verifies the block and record sizes + specified at construction time are compatible with the tar + specification. In particular 512 is the only record size + accepted and the block size must be a multiple of 512. + At the same time the default block size in + TarArchiveOutputStream has been changed from 10240 to 512 + bytes. + </action> + <action issue="COMPRESS-400" type="add" date="2017-06-26" + due-to="Simon Spero "> + It is now possible to specify/read custom PAX headers when + writing/reading tar archives. + </action> + <action issue="COMPRESS-415" type="fix" date="2017-06-27"> + Fixed class names of CpioArchiveEntry and + CpioArchiveInputStream in various Javadocs. + </action> + <action issue="COMPRESS-416" type="fix" date="2017-07-04" + due-to="Simon Spero "> + The code of the extended timestamp zip extra field incorrectly + assumed the time was stored as unsigned 32-bit int and thus + created incorrect results for years after 2037. + </action> + <action issue="COMPRESS-410" type="fix" date="2017-07-05" + due-to="Simon Spero "> + Removed ZipEncoding code that became obsolete when we started + to require Java 5 as baseline long ago. + </action> + <action issue="COMPRESS-417" type="fix" date="2017-07-19"> + The tar package will no longer try to parse the major and + minor device numbers unless the entry represents a character + or block special file. + </action> + <action issue="COMPRESS-421" type="fix" date="2017-10-06" + due-to="Roel Spilker"> + When reading tar headers with name fields containing embedded + NULs, the name will now be terminated at the first NUL byte. + </action> + <action issue="COMPRESS-409" type="fix" date="2017-10-08"> + Simplified TarArchiveOutputStream by replacing the internal + buffering with new class FixedLengthBlockOutputStream. + </action> + </release> + <release version="1.14" date="2017-05-14" + description="Release 1.14"> + <action issue="COMPRESS-378" type="fix" date="2017-01-09"> + SnappyCompressorInputStream slides the window too early + leading to ArrayIndexOutOfBoundsExceptions for some streams. + </action> + <action issue="COMPRESS-246" type="add" date="2017-01-10"> + Added write support for Snappy. + </action> + <action issue="COMPRESS-358" type="update" date="2017-01-10"> + The blocksize for FramedSnappyCompressorInputStream can now be + configured as some IWA files seem to be using blocks larger + than the default 32k. + </action> + <action issue="COMPRESS-379" type="fix" date="2017-01-15" + due-to="Guillaume Boué"> + ZipArchiveEntry#isUnixSymlink now only returns true if the + corresponding link flag is the only file-type flag set. + </action> + <action issue="COMPRESS-271" type="add" date="2017-02-07"> + Added support for LZ4 (block and frame format). + </action> + <action type="update" date="2017-02-15" due-to="Thomas Meyer"> + BZip2CompressorInputstream now uses BitInputStream internally. + Pull Request #13. + </action> + <action type="fix" date="2017-03-29" due-to="Daniel Collin"> + Fixed an integer overflow in CPIO's CRC calculation. + Pull Request #17. + </action> + <action issue="COMPRESS-385" type="add" date="2017-04-18"> + Add static detect(InputStream in) to CompressorStreamFactory + and ArchiveStreamFactory + </action> + <action issue="COMPRESS-387" type="fix" date="2017-04-18"> + Make unit tests work on Windows paths with spaces in their names. + </action> + <action issue="COMPRESS-388" type="update" date="2017-04-25" + due-to="Zbynek Vyskovsky"> + Improved performance for concurrent reads from ZipFile when + reading from a file. + </action> + <action issue="COMPRESS-382" type="add" date="2017-04-25" + due-to="Tim Allison"> + Added a way to limit amount of memory ZCompressorStream may + use. + </action> + <action issue="COMPRESS-386" type="add" date="2017-04-25" + due-to="Tim Allison"> + Added a way to limit amount of memory ZCompressorStream may + use. + </action> + <action issue="COMPRESS-382" type="add" date="2017-04-25" + due-to="Tim Allison"> + Added a way to limit amount of memory LZMACompressorStream and + XZCompressorInputStream may use. + </action> + <action issue="COMPRESS-389" type="fix" date="2017-04-26"> + Internal location pointer in ZipFile could get incremented + even if nothing had been read. + </action> + <action issue="COMPRESS-392" type="add" date="2017-05-02" due-to="Philippe Mouawad"> + Add Brotli decoder based on the Google Brotli library. + </action> + <action issue="COMPRESS-390" type="add" date="2017-05-04" + due-to="Zbynek Vyskovsky"> + ZipEntry now exposes its data offset. + </action> + <action issue="COMPRESS-393" type="fix" date="2017-05-07"> + LZMACompressorOutputStream#flush would throw an exception + rather than be the NOP it promised to be. + </action> + <action issue="COMPRESS-391" type="add" date="2017-05-11" + due-to="Zbynek Vyskovsky"> + Using ZipArchiveEntry's setAlignment it is now possible to + ensure the data offset of an entry starts at a file position + that at word or page boundaries. + A new extra field has been added for this purpose. + </action> + </release> + <release version="1.13" date="2016-12-29" + description="Release 1.13 - API compatible to 1.12 but requires Java 7 at runtime."> + <action issue="COMPRESS-360" type="update" date="2016-06-25" dev="ggregory"> + Update Java requirement from 6 to 7. + </action> + <action issue="COMPRESS-363" type="fix" date="2016-07-01"> + BitInputStream could return bad results when overflowing + internally - if two consecutive reads tried to read more than + 64 bits. + </action> + <action issue="COMPRESS-366" type="update" date="2016-10-07"> + Clarified which TarArchiveEntry methods are useless for + entries read from an archive. + </action> + <action issue="COMPRESS-364" type="fix" date="2016-10-07" + due-to="Mike Mole"> + ZipArchiveInputStream.closeEntry does not properly advance to + next entry if there are junk bytes at end of data section + </action> + <action issue="COMPRESS-327" type="add" date="2016-10-12"> + SevenZFile, SevenZOutputFile, ZipFile and + ZipArchiveOutputStream can now work on non-file resources if + they can be accessed via SeekableByteChannel. + </action> + <action issue="COMPRESS-368" type="add" date="2016-11-15"> + Allow compressor extensions through a standard JRE ServiceLoader. + </action> + <action issue="COMPRESS-369" type="add" date="2016-11-15"> + Allow archive extensions through a standard JRE ServiceLoader. + </action> + <action issue="COMPRESS-373" type="add" date="2016-11-29"> + Add write support for the legacy LZMA format, this requires XZ + for Java 1.6. + </action> + <action issue="COMPRESS-374" type="add" date="2016-11-29"> + Add write support for the legacy LZMA stream to 7z, this + requires XZ for Java 1.6. + </action> + <action issue="COMPRESS-375" type="add" date="2016-12-04" + due-to="Plamen Totev"> + Allow the clients of ParallelScatterZipCreator to provide + ZipArchiveEntryRequestSupplier. + </action> + <action issue="COMPRESS-367" type="fix" date="2016-12-09" + due-to="Mike Mole"> + ZipArchiveInputStream now throws an Exception if it encounters + a broken ZIP archive rather than signaling end-of-archive. + </action> + <action issue="COMPRESS-377" type="fix" date="2016-12-20"> + ScatterZipOutputStream didn't close the StreamCompressor + causing a potential resource leak. + </action> + <action issue="COMPRESS-372" type="add" date="2016-12-20"> + Add a version-independent link to the API docs of the latest + release. + </action> + </release> + <release version="1.12" date="2016-06-21" + description="Release 1.12 - API compatible to 1.11 but requires Java 6 at runtime. +------------ + + + +Release 1.12 changes the behavior of BZip2CompressorOutputStream's +finalize method so that it no longer invokes finish. This is going to +break code that relied on the finalizer to clean up an unfinished +stream. The code will need to be changed to call finish or +close itself. Note that a finalizer is not guaranteed to run, so +the feature was not 100% effective in any case. +"> + <action issue="COMPRESS-349" type="update" date="2016-04-09" dev="ggregory"> + Update requirement from Java 5 to 6. + </action> + <action issue="COMPRESS-350" type="update" date="2016-04-09" dev="ggregory"> + TarArchiveEntry wastefully allocates empty arrays. + </action> + <action issue="COMPRESS-348" type="fix" date="2016-04-24"> + SevenZFile.read() throws an IllegalStateException for empty entries. + </action> + <action issue="COMPRESS-353" type="update" date="2016-05-06" dev="ggregory"> + Javadoc for BZip2CompressorInputStream(InputStream, boolean) should refer to IOEx, not NPE. + </action> + <action issue="COMPRESS-354" type="update" date="2016-05-16"> + PureJavaCrc32C in the snappy package is now final so it is now + safe to call a virtual method inside the constructor. + </action> + <action issue="COMPRESS-355" type="fix" date="2016-05-20" + due-to="Jeremy Gustie"> + TarArchiveInputStream failed to parse PAX headers that + included blank lines. + </action> + <action issue="COMPRESS-356" type="fix" date="2016-05-20" + due-to="Jeremy Gustie"> + TarArchiveInputStream failed to parse PAX headers whose tar + entry name ended with a slash. + </action> + <action issue="COMPRESS-352" type="add" date="2016-05-22"> + FramedSnappyCompressorInputStream now supports the dialect of + Snappy used by the IWA files contained within the zip archives + used in Apple's iWork 13 files. + </action> + <action issue="COMPRESS-351" type="update" date="2016-06-07"> + ZipArchiveInputStream and CpioArchiveInputStream could throw + exceptions who's messages contained potentially corrupt entry + names read from a broken archive. They will now sanitize the + names by replacing unprintable characters and restricting the + length to 255 characters. + </action> + <action issue="COMPRESS-357" type="update" date="2016-06-15"> + BZip2CompressorOutputStream no longer tries to finish the + output stream in finalize. This is a breaking change for code + that relied on the finalizer. + </action> + </release> + <release version="1.11" date="2016-04-06" + description="Release 1.11"> + <action issue="COMPRESS-347" type="add" date="2016-03-23"> + TarArchiveInputStream now supports reading global PAX headers. + </action> + <action issue="COMPRESS-346" type="add" date="2016-03-23"> + The PAX headers for sparse entries written by star are now + applied. + </action> + <action issue="COMPRESS-345" type="add" date="2016-03-23"> + GNU sparse files using one of the PAX formats are now + detected, but cannot be extracted. + </action> + <action issue="COMPRESS-344" type="fix" date="2016-03-22"> + ArArchiveInputStream can now read GNU extended names that are + terminated with a NUL byte rather than a linefeed. + </action> + <action issue="COMPRESS-341" type="add" date="2016-03-20"> + New method SevenZFile.getEntries can be used to list the + contents of a 7z archive. + </action> + <action issue="COMPRESS-343" type="fix" date="2016-03-17" + due-to="Rene Preissel"> + Native Memory Leak in Sevenz-DeflateDecoder. + </action> + <action type="add" date="2016-03-05" + due-to="Matt Hovey"> + When using Zip64Mode.Always also use ZIP64 extensions inside + the central directory. + GitHub Pull Request #10 + </action> + <action issue="COMPRESS-340" type="fix" date="2016-02-24" + due-to="Dawid Weiss"> + SevenZFile will now only try to drain an entry's content when + moving on to the next entry if data is read from the next + entry. This should improve performance for applications that + try to skip over entries. + </action> + <action issue="COMPRESS-336" type="fix" date="2016-02-14"> + file names of tar archives using the xstar format are now + parsed properly. + </action> + <action issue="COMPRESS-335" type="fix" date="2016-02-05"> + checksums of tars that pad the checksum field to the left are + now calculated properly. + </action> + <action issue="COMPRESS-334" type="fix" date="2016-02-05" + due-to="Jeremy Gustie"> + ArArchiveInputStream failed to read past the first entry when + BSD long names have been used. + </action> + <action issue="COMPRESS-333" type="fix" date="2016-02-03" due-to="Dawid Weiss"> + Added buffering for random access which speeds up 7Z support. + </action> + <action issue="COMPRESS-331" type="fix" date="2016-01-31"> + The checksum validation of TararchiveEntry is now as strict as + the validation of GNU tar, which eliminates a few cases of + false positives of ArchiveStreamFactory. + This behavior is a breaking change since the check has become + more strict but any archive that fails the checksum test now + would also fail it when extracted with other tools and must be + considered an invalid archive. + </action> + <action issue="COMPRESS-323" type="add" date="2016-01-29"> + ZipFile.getRawInputStream() is now part of the public API + </action> + <action issue="COMPRESS-332" type="fix" date="2016-01-29"> + SnappyCompressorInputStream and + FramedSnappyCompressorInputStream returned 0 at the end of the + stream under certain circumstances. + </action> + <action type="add" date="2016-01-27" due-to="Jason van Zyl"> + Allow byte-for-byte replication of Zip entries. + GitHub Pull Request #6. + </action> + <action issue="COMPRESS-328" type="add" date="2016-01-15"> + TarArchiveEntry's preserveLeadingSlashes is now a property and used + on later calls to setName, too. + This behavior is a breaking change. + </action> + <action issue="COMPRESS-326" type="fix" date="2015-10-24"> + Adjusted unit test to updates in Java8 and later that change + the logic of ZipEntry#getTime. + </action> + <action issue="COMPRESS-324" type="fix" date="2015-10-06"> + TarArchiveOutputStream will now recognize GNU long name and + link entries even if the special entry has a different name + than GNU tar uses itself. This seems to be the case for + archives created by star. + </action> + <action issue="COMPRESS-321" type="fix" date="2015-08-22"> + ArrayIndexOutOfBoundsException when InfoZIP type 7875 extra + fields are read from the central directory. + </action> + <action type="add" date="2015-11-11" due-to="Sören Glimm"> + Added read-only support for bzip2 compression used inside of + ZIP archives. + GitHub Pull Request #4. + </action> + </release> + + <release version="1.10" date="2015-08-18" + description="Release 1.10 +------------ + + + +Release 1.10 moves the former +org.apache.commons.compress.compressors.z._internal_ package which +breaks backwards compatibility for code which used the old package. +This also changes the superclass of ZCompressorInputStream. +"> + + <action issue="COMPRESS-317" type="fix" date="2015-06-09" + due-to="Lucas Werkmeister"> + ArrayIndexOutOfBoundsException when ZIP extra fields are read + and the entry contains an UnparseableExtraField. + </action> + <action issue="COMPRESS-316" type="add" date="2015-05-23" + due-to="Nick Burch"> + CompressorStreamFactory can now auto-detect DEFLATE streams + with ZLIB header. + </action> + <action issue="COMPRESS-314" type="fix" date="2015-05-08"> + TarArchiveInputStream can now read entries with group or + user ids > 0x80000000. + </action> + <action issue="COMPRESS-315" type="fix" date="2015-05-06"> + TarArchiveOutputStream can now write entries with group or + user ids > 0x80000000. + </action> + <action issue="COMPRESS-313" type="add" date="2015-03-30"> + CompressorStreamFactory can now auto-detect LZMA streams. + </action> + <action issue="COMPRESS-312" type="fix" date="2015-03-28"> + TarArchiveEntry's constructor with a File and a String arg + didn't normalize the name. + </action> + <action issue="COMPRESS-308" type="fix" date="2015-02-20"> + ZipEncodingHelper no longer reads system properties directly + to determine the default charset. + </action> + <action issue="COMPRESS-309" type="fix" date="2015-02-20"> + BZip2CompressorInputStream#read would return -1 when asked to + read 0 bytes. + </action> + <action issue="COMPRESS-306" type="fix" date="2015-02-17"> + ArchiveStreamFactory fails to pass on the encoding when creating some streams. + * ArjArchiveInputStream + * CpioArchiveInputStream + * DumpArchiveInputStream + * JarArchiveInputStream + * TarArchiveInputStream + * JarArchiveOutputStream + </action> + <action issue="COMPRESS-302" type="fix" date="2015-02-16"> + Restore immutability/thread-safety to ArchiveStreamFactory. + The class is now immutable provided that the method setEntryEncoding is not used. + The class is thread-safe. + </action> + <action issue="COMPRESS-303" type="fix" date="2015-02-16"> + Restore immutability/thread-safety to CompressorStreamFactory. + The class is now immutable provided that the method setDecompressConcatenated is not used. + The class is thread-safe. + </action> + <action issue="COMPRESS-298" type="fix" date="2015-01-20"> + SevenZFile now throws the specific PasswordRequiredException + when it encounters an encrypted stream but no password has + been specified. + </action> + <action issue="COMPRESS-290" type="fix" date="2015-01-13" + due-to="Kristian Rosenvold"> + Improved error message when tar encounters a groupId that is + too big to write without using the STAR or POSIX format. + </action> + <action issue="COMPRESS-296" type="add" date="2015-01-10" + due-to="Kristian Rosenvold"> + Added support for parallel compression. This low-level API allows + a client to build a zip/jar file by using the class + org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator. + + Zip documentation updated with further notes about parallel features. + + Please note that some aspects of jar creation need to be + handled by client code and is not part of commons-compress for this + release. + </action> + <action type="add" date="2014-12-24" + due-to="Kristian Rosenvold"> + Cut overall object instantiation in half by changing file + header generation algorithm, for a 10-15 percent performance + improvement. + + Also extracted two private methods createLocalFileHeader + and createCentralFileHeader in ZipArchiveOutputStream. + These may have some interesting additional usages in the + near future. + </action> + <action issue="COMPRESS-297" type="fix" date="2014-12-22"> + ZipFile logs a warning in its finalizer when its constructor + has thrown an exception reading the file - for example if the + file doesn't exist. + </action> + <action issue="COMPRESS-295" type="add" date="2014-12-18" + due-to="Kristian Rosenvold"> + New methods in ZipArchiveOutputStream and ZipFile allows + entries to be copied from one archive to another without + having to re-compress them. + </action> + <action type="update" date="2014-10-28" + due-to="Damjan Jovanovic"> + Moved the package + org.apache.commons.compress.compressors.z._internal_ to + org.apache.commons.compress.compressors.lzw and made it part + of the API that is officially supported. This will break + existing code that uses the old package. + </action> + </release> + + <release version="1.9" date="2014-10-09" + description="Release 1.9"> + <action type="add" date="2014-06-14" issue="COMPRESS-263" + due-to="Matthias Stevens"> + Added support for DEFLATE streams without any gzip framing. + </action> + <action type="fix" date="2014-08-14" issue="COMPRESS-287"> + When reading 7z files unknown file properties and properties + of type kDummy are now ignored. + </action> + <action type="fix" date="2014-08-21" issue="COMPRESS-286"> + Expanding 7z archives using LZMA compression could cause an + EOFException. + </action> + <action type="update" date="2014-08-31" issue="COMPRESS-285"> + Checking for XZ for Java may be expensive. The result will + now be cached outside of an OSGi environment. You can use the + new XZUtils#setCacheXZAvailability to overrride this default + behavior. + </action> + <action type="fix" date="2014-09-19" issue="COMPRESS-289" + due-to="Bob Robertson"> + Long-Name and -link or PAX-header entries in TAR archives + always had the current time as last modfication time, creating + archives that are different at the byte level each time an + archive was built. + </action> + </release> + + <release version="1.8.1" date="2014-05-14" + description="Release 1.8.1"> + <action type="update" date="2014-03-19"> + The dependency on org.tukaani:xz is now marked as optional. + </action> + <action type="fix" date="2014-03-28" issue="COMPRESS-270"> + The snappy, ar and tar inputstreams might fail to read from a + non-buffered stream in certain cases. + </action> + <action type="add" date="2014-04-12" issue="COMPRESS-272"> + CompressorStreamFactory can now auto-detect Unix compress + (".Z") streams. + </action> + <action type="fix" date="2014-04-12" issue="COMPRESS-277"> + IOUtils#skip might skip fewer bytes than requested even though + more could be read from the stream. + </action> + <action type="fix" date="2014-04-13" issue="COMPRESS-276"> + ArchiveStreams now validate there is a current entry before + reading or writing entry data. + </action> + <action type="fix" date="2014-04-13"> + ArjArchiveInputStream#canReadEntryData tested the current + entry of the stream rather than its argument. + </action> + <action type="fix" date="2014-04-13" issue="COMPRESS-274"> + ChangeSet#delete and deleteDir now properly deal with unnamed + entries. + </action> + <action type="fix" date="2014-04-18" issue="COMPRESS-273"> + Added a few null checks to improve robustness. + </action> + <action type="fix" date="2014-04-19" issue="COMPRESS-278"> + TarArchiveInputStream failed to read archives with empty + gid/uid fields. + </action> + <action type="fix" date="2014-04-27" issue="COMPRESS-279"> + TarArchiveInputStream now again throws an exception when it + encounters a truncated archive while reading from the last + entry. + </action> + <action type="fix" date="2014-05-02" issue="COMPRESS-280" + due-to="BELUGA BEHR"> + Adapted TarArchiveInputStream#skip to the modified + IOUtils#skip method. + </action> + </release> + <release version="1.8" date="2014-03-12" + description="Release 1.8"> + <action issue="COMPRESS-253" type="fix" date="2014-01-20"> + BZip2CompressorInputStream read fewer bytes than possible from + a truncated stream. + </action> + <action issue="COMPRESS-253" type="fix" date="2014-01-22"> + SevenZFile failed claiming the dictionary was too large when + archives used LZMA compression for headers and content and + certain non-default dictionary sizes. + </action> + <action issue="COMPRESS-259" type="fix" date="2014-01-24"> + CompressorStreamFactory.createCompressorInputStream with + explicit compression did not honor decompressConcatenated + </action> + <action issue="COMPRESS-260" type="add" date="2014-02-20"> + GzipCompressorInputStream now provides access to the same + metadata that can be provided via GzipParameters when writing + a gzip stream. + </action> + <action issue="COMPRESS-262" type="fix" date="2014-02-21"> + TarArchiveInputStream will now read archives created by tar + implementations that encode big numbers by not adding a + trailing NUL. + </action> + <action issue="COMPRESS-264" type="fix" date="2014-02-21"> + ZipArchiveInputStream would return NUL bytes for the first 512 + bytes of a STORED entry if it was the very first entry of the + archive. + </action> + <action issue="COMPRESS-265" type="fix" date="2014-02-22"> + When writing PAX/POSIX headers for TAR entries with + backslashes or certain non-ASCII characters in their name + TarArchiveOutputStream could fail. + </action> + <action issue="COMPRESS-267" type="fix" date="2014-02-22"> + ArchiveStreamFactory now throws a StreamingNotSupported - a + new subclass of ArchiveException - if it is asked to read from + or write to a stream and Commons Compress doesn't support + streaming for the format. This currently only applies to the + 7z format. + </action> + <action issue="COMPRESS-266" type="add" date="2014-02-25"> + SevenZOutputFile now supports chaining multiple + compression/encryption/filter methods and passing options to + the methods. + </action> + <action issue="COMPRESS-261" type="add" date="2014-02-26"> + The (compression) method(s) can now be specified per entry in + SevenZOutputFile. + </action> + <action issue="COMPRESS-258" type="add" date="2014-02-26"> + SevenZArchiveEntry "knows" which method(s) have been used to + write it to the archive. + </action> + <action type="add" date="2014-02-28"> + The 7z package now supports the delta filter as method. + </action> + <action issue="COMPRESS-257" type="add" date="2014-03-03"> + The 7z package now supports BCJ filters for several platforms. + You will need a version >= 1.5 of XZ for Java to read archives + using BCJ, though. + </action> + </release> + <release version="1.7" date="2014-01-20" + description="Release 1.7"> + <action issue="COMPRESS-241" type="fix" date="2013-10-27"> + SevenZOutputFile#closeArchiveEntry throws an exception when + using LZMA2 compression on Java8. + </action> + <action issue="COMPRESS-147" type="add" date="2013-11-07" + due-to="BELUGA BEHR"> + Read-Only support for Snappy compression. + </action> + <action issue="COMPRESS-244" type="fix" date="2013-11-27" + due-to="Nico Kruber"> + 7z reading of big 64bit values could be wrong. + </action> + <action issue="COMPRESS-243" type="add" date="2013-11-30" + due-to="Damjan Jovanovic"> + Read-Only support for .Z compressed files. + </action> + <action type="add" date="2013-12-06" due-to="Damjan Jovanovic"> + ZipFile and ZipArchiveInputStream now support reading entries compressed using the + SHRINKING method. + </action> + <action issue="COMPRESS-245" type="fix" date="2013-12-06"> + TarArchiveInputStream could fail to read an archive completely. + </action> + <action issue="COMPRESS-242" type="fix" date="2013-12-08"> + The time-setters in X5455_ExtendedTimestamp now set the + corresponding flags explicitly - i.e. they set the bit if the + valus is not-null and reset it otherwise. This may cause + incompatibilities if you use setFlags to unset a bit and later + set the time to a non-null value - the flag will now be set. + </action> + <action issue="COMPRESS-250" type="add" date="2013-12-16" due-to="Emmanuel Bourg"> + GzipCompressorOutputStream now supports setting the compression level and the header metadata + (filename, comment, modification time, operating system and extra flags) + </action> + <action issue="COMPRESS-115" type="add" date="2013-12-19" due-to="Emmanuel Bourg"> + ZipFile and ZipArchiveInputStream now support reading entries compressed using the IMPLODE method. + </action> + <action issue="COMPRESS-252" type="fix" date="2013-12-20"> + SevenZOutputFile would create invalid archives if more than + six empty files or directories were included. + </action> + <action type="add" date="2013-12-20"> + ZipFile and the 7z file classes now implement Closeable and + can be used in try-with-resources constructs. + </action> + </release> + <release version="1.6" date="2013-10-26" + description="Release 1.6"> + <action type="fix" date="2013-04-25" issue="COMPRESS-223" + due-to="Jeremy Gustie"> + TarBuffer.tryToConsumeSecondEOFRecord could throw a + NullPointerException + </action> + <action type="add" date="2013-05-07" issue="COMPRESS-54" + due-to="Damjan Jovanovic"> + Added support for 7z archives. Most compression algorithms + can be read and written, LZMA and encryption are only + supported when reading. + </action> + <action type="add" date="2013-05-19" issue="COMPRESS-226" + due-to="Damjan Jovanovic"> + Added read-only support for ARJ archives that don't use + compression. + </action> + <action type="fix" date="2013-05-26" issue="COMPRESS-228"> + Parsing of zip64 extra fields has become more lenient in order + to be able to read archives created by DotNetZip and maybe + other archivers as well. + </action> + <action type="fix" date="2013-06-03" issue="COMPRESS-229" + due-to="Christoph Gysin"> + TAR will now properly read the names of symbolic links with + long names that use the GNU variant to specify the long file + name. + </action> + <action type="fix" date="2013-06-04" issue="COMPRESS-227"> + ZipFile#getInputStream could return null if the archive + contained duplicate entries. + The class now also provides two new methods to obtain all + entries of a given name rather than just the first one. + </action> + <action type="update" date="2013-07-08" issue="COMPRESS-232" + due-to="BELUGA BEHR"> + Readabilty patch to TarArchiveInputStream. + </action> + <action type="update" date="2013-08-08" issue="COMPRESS-234" + due-to="BELUGA BEHR"> + Performance improvements to TarArchiveInputStream, in + particular to the skip method. + </action> + <action type="fix" date="2013-08-08" issue="COMPRESS-236" + due-to="Andrew Duffy"> + CpioArchiveInputStream failed to read archives created by + Redline RPM. + </action> + <action type="fix" date="2013-08-09" issue="COMPRESS-237" + due-to="Emmanuel Bourg"> + TarArchiveOutputStream now properly handles link names that + are too long to fit into a traditional TAR header. + </action> + <action type="add" date="2013-08-10"> + DumpArchiveInputStream now supports an encoding parameter that + can be used to specify the encoding of file names. + </action> + <action type="add" date="2013-08-10"> + The CPIO streams now support an encoding parameter that can be + used to specify the encoding of file names. + </action> + <action type="add" date="2013-09-22" issue="COMPRESS-111"> + Read-only support for LZMA standalone compression has been added. + </action> + <action type="fix" date="2013-10-04" issue="COMPRESS-239"> + The auto-detecting create*InputStream methods of Archive and + CompressorStreamFactory could fail to detect the format of + blocking input streams. + </action> + <action type="fix" date="2013-10-21" issue="COMPRESS-240" + due-to="Gary Gregory"> + ZipEncodingHelper.isUTF8(String) does not check all UTF-8 aliases. + </action> + </release> + <release version="1.5" date="2013-03-14" + description="Release 1.5"> + <action type="fix" date="2012-02-19" issue="COMPRESS-218" + due-to="Gili"> + Typo in CompressorStreamFactory Javadoc + </action> + <action type="update" date="2012-07-08" issue="COMPRESS-188" + due-to="Harald Kuhn"> + Improved exception message if a zip archive cannot be read + because of an unsupported compression method. + </action> + <action type="update" date="2012-07-07" issue="COMPRESS-192" + due-to="Jukka Zitting"> + ArchiveStreamFactory has a setting for file name encoding that + sets up encoding for ZIP and TAR streams. + </action> + <action type="fix" date="2012-07-07" issue="COMPRESS-191" + due-to="Jukka Zitting"> + ArchiveStreamFactory's tar stream detection created false + positives for AIFF files. + </action> + <action type="update" date="2012-07-07" issue="COMPRESS-191" + due-to="Jukka Zitting"> + TarArchiveEntry now has a method to verify its checksum. + </action> + <action type="fix" date="2012-12-27" issue="COMPRESS-199" + due-to="Jukka Zitting"> + XZ for Java didn't provide an OSGi bundle. Compress' + dependency on it has now been marked optional so Compress + itself can still be used in an OSGi context. + </action> + <action type="fix" date="2012-12-27" issue="COMPRESS-200" + due-to="Christian Schlichtherle"> + When specifying the encoding explicitly TarArchiveOutputStream + would write unreadable names in GNU mode or even cause errors + in POSIX mode for file names longer than 66 characters. + </action> + <action type="fix" date="2012-12-27" issue="COMPRESS-203"> + Writing TAR PAX headers failed if the generated entry name + ended with a "/". + </action> + <action type="fix" date="2012-12-28" issue="COMPRESS-189" + due-to="Daniel Lowe"> + ZipArchiveInputStream sometimes failed to provide input to the + Inflater when it needed it, leading to reads returning 0. + </action> + <action type="update" date="2013-01-01"> + Split/spanned ZIP archives are now properly detected by + ArchiveStreamFactory but will cause an + UnsupportedZipFeatureException when read. + </action> + <action type="update" date="2013-01-01" issue="COMPRESS-208"> + ZipArchiveInputStream now reads archives that start with a + "PK00" signature. Archives with this signatures are created + when the archiver was willing to split the archive but in the + end only needed a single segment - so didn't split anything. + </action> + <action type="update" date="2013-01-01" issue="COMPRESS-201"> + TarArchiveEntry has a new constructor that allows setting + linkFlag and preserveLeadingSlashes at the same time. + </action> + <action type="update" date="2013-01-01" issue="COMPRESS-159"> + ChangeSetPerformer has a new perform overload that uses a + ZipFile instance as input. + </action> + <action type="fix" date="2013-01-04" issue="COMPRESS-212"> + TarArchiveInputStream ignored the encoding for GNU long name + entries. + </action> + <action type="update" date="2013-01-05" issue="COMPRESS-172" + due-to="Thomas Mair"> + Garbage collection pressure has been reduced by reusing + temporary byte arrays in classes. + </action> + <action type="update" date="2013-01-08" issue="COMPRESS-210" + due-to="Julius Davies"> + Can now handle zip extra field 0x5455 - Extended Timestamp. + </action> + <action type="update" date="2013-01-07" issue="COMPRESS-211" + due-to="Julius Davies"> + handle zip extra field 0x7875 - Info Zip New Unix Extra Field. + </action> + <action type="update" date="2013-01-07" issue="COMPRESS-213" + due-to="Julius Davies"> + ZipShort, ZipLong, ZipEightByteInteger should implement Serializable + </action> + <action type="update" date="2013-01-14" issue="COMPRESS-214" + due-to="Julius Davies"> + better support for unix symlinks in ZipFile entries + </action> + <action type="update" date="2013-01-19" issue="COMPRESS-215" + due-to="Robin Power"> + ZipFile's initialization has been improved for non-Zip64 + archives. + </action> + <action type="fix" date="2013-01-20" issue="COMPRESS-206" + due-to="Peter De Maeyer"> + TarArchiveInputStream could leave the second EOF record + inside the stream it had just finished reading. + </action> + <action type="fix" date="2013-01-20"> + DumpArchiveInputStream no longer implicitly closes the + original input stream when it reaches the end of the + archive. + </action> + <action type="fix" date="2013-01-22"> + ZipArchiveInputStream now consumes the remainder of the + archive when getNextZipEntry returns null. + </action> + <action type="fix" date="2013-01-27" issue="COMPRESS-205" + due-to="Daniel Lowe"> + Unit tests could fail if the source tree was checked out to + a directory tree containign spaces. + </action> + <action type="update" date="2013-01-31"> + Updated XZ for Java dependency to 1.2 as this version + provides proper OSGi manifest attributes. + </action> + <action type="fix" date="2013-02-20" issue="COMPRESS-219"> + Fixed a potential ArrayIndexOutOfBoundsException when + reading STORED entries from ZipArchiveInputStream. + </action> + <action type="fix" date="2013-03-07" issue="COMPRESS-221"> + CompressorStreamFactory can now be used without XZ for Java + being available. + </action> + <action type="add" date="2013-03-07" issue="COMPRESS-220"> + CompressorStreamFactory has an option to create + decompressing streams that decompress the full input for + formats that support multiple concatenated streams. + </action> + </release> + <release version="1.4.1" date="2012-05-23" + description="Release 1.4.1"> + <action type="fix" date="2012-05-20"> + Ported libbzip2's fallback sort algorithm to + BZip2CompressorOutputStream to speed up compression in certain + edge cases. + Using specially crafted inputs this can be used as a denial + of service attack. See the security reports page for details. + </action> + </release> + <release version="1.4" date="2012-04-11" + description="Release 1.4"> + <action issue="COMPRESS-183" type="fix" date="2012-03-24"> + The tar package now allows the encoding of file names to be + specified and can optionally use PAX extension headers to + write non-ASCII file names. + The stream classes now write (or expect to read) archives that + use the platform's native encoding for file names. Apache + Commons Compress 1.3 used to strip everything but the lower + eight bits of each character which effectively only worked for + ASCII and ISO-8859-1 file names. + This new default behavior is a breaking change. + </action> + <action issue="COMPRESS-184" type="fix" date="2012-03-23"> + TarArchiveInputStream failed to parse PAX headers that + contained non-ASCII characters. + </action> + <action issue="COMPRESS-182" type="update" date="2012-03-02"> + The tar package can now write archives that use star/GNU/BSD + extensions or use the POSIX/PAX variant to store numeric + values that don't fit into the traditional header fields. + </action> + <action issue="COMPRESS-181" type="update" date="2012-03-02"> + Added a workaround for a Bug some tar implementations that add + a NUL byte as first byte in numeric header fields. + </action> + <action issue="COMPRESS-176" type="update" date="2012-02-28"> + Added a workaround for a Bug in WinZIP which uses backslashes + as path separators in Unicode Extra Fields. + </action> + <action issue="COMPRESS-131" type="update" date="2012-02-23"> + ArrayOutOfBounds while decompressing bz2. Added test case - code already seems to have been fixed. + </action> + <action issue="COMPRESS-178" type="fix" date="2012-02-23"> + TarArchiveInputStream throws IllegalArgumentException instead of IOException + </action> + <action issue="COMPRESS-179" type="fix" date="2012-02-23"> + TarUtils.formatLongOctalOrBinaryBytes() assumes the field will be 12 bytes long + </action> + <action issue="COMPRESS-175" type="fix" date="2012-02-22"> + GNU Tar sometimes uses binary encoding for UID and GID + </action> + <action issue="COMPRESS-171" type="fix" date="2012-01-29"> + ArchiveStreamFactory.createArchiveInputStream would claim + short text files were TAR archives. + </action> + <action issue="COMPRESS-156" type="add" date="2011-11-02"> + Support for the XZ format has been added. + </action> + <action issue="COMPRESS-146" type="update" date="2011-11-07"> + BZip2CompressorInputStream now optionally supports reading of + concatenated .bz2 files. + </action> + <action issue="COMPRESS-154" type="update" date="2011-11-09"> + GZipCompressorInputStream now optionally supports reading of + concatenated .gz files. + </action> + <action issue="COMPRESS-164" type="fix" date="2011-12-05"> + ZipFile didn't work properly for archives using unicode extra + fields rather than UTF-8 filenames and the EFS-Flag. + </action> + <action issue="COMPRESS-16" type="update" date="2011-12-07"> + The tar package can now read archives that use star/GNU/BSD + extensions for files that are longer than 8 GByte as well as + archives that use the POSIX/PAX variant. + </action> + <action issue="COMPRESS-165" type="update" date="2011-12-08"> + The tar package can now write archives that use star/GNU/BSD + extensions for files that are longer than 8 GByte as well as + archives that use the POSIX/PAX variant. + </action> + <action issue="COMPRESS-166" type="update" date="2011-12-08"> + The tar package can now use the POSIX/PAX variant for writing + entries with names longer than 100 characters. + </action> + <action issue="COMPRESS-169" type="fix" date="2011-12-22"> + For corrupt archives ZipFile would throw a RuntimeException in + some cases and an IOException in others. It will now + consistently throw an IOException. + </action> + </release> + <release version="1.3" date="2011-11-01" + description="Release 1.3 - API compatible to 1.2 but requires Java5 at runtime"> + <action issue="COMPRESS-142" type="add" date="2011-09-14"> + Support for the Pack200 format has been added. + </action> + <action issue="COMPRESS-132" type="add" date="2011-08-17"> + Read-only support for the format used by the Unix dump(8) tool + has been added. + </action> + <action issue="COMPRESS-36" type="update" date="2011-08-15"> + The ZIP package now supports Zip64 extensions. + </action> + <action issue="COMPRESS-144" type="update" date="2011-08-08"> + The AR package now supports the BSD dialect of storing file + names longer than 16 chars (both reading and writing). + </action> + <action type="fix" date="2011-08-08"> + BZip2CompressorInputStream's getBytesRead method always + returned 0. + </action> + <action issue="COMPRESS-152" type="fix" date="2011-08-03"> + ZipArchiveInputStream and ZipArchiveOutputStream could leak + resources on some JDKs. + </action> + <action issue="COMPRESS-160" type="fix" date="2011-10-23"> + TarArchiveOutputStream's getBytesWritten method didn't count + correctly. + </action> + </release> + <release version="1.2" date="2011-07-31" + description="Release 1.2 - a bugfix release, the last release expected to be compatible with Java 1.4"> + <action issue="COMPRESS-129" type="fix" date="2011-07-26"> + ZipArchiveInputStream could fail with a "Truncated ZIP" error + message for entries between 2 GByte and 4 GByte in size. + </action> + <action issue="COMPRESS-145" type="fix" date="2011-07-23" + due-tue="Patrick Dreyer"> + TarArchiveInputStream now detects sparse entries using the + oldgnu format and properly reports it cannot extract their + contents. + </action> + <action issue="COMPRESS-123" type="add" date="2011-07-23"> + ZipArchiveEntry has a new method getRawName that provides the + original bytes that made up the name. This may allow user + code to detect the encoding. + </action> + <action issue="COMPRESS-130" type="fix" date="2011-07-20"> + The Javadoc for ZipArchiveInputStream#skip now matches the + implementation, the code has been made more defensive. + </action> + <action issue="COMPRESS-140" type="fix" date="2011-07-20" + due-to="Trejkaz"> + ArArchiveInputStream fails if entries contain only blanks for + userId or groupId. + </action> + <action issue="COMPRESS-139" type="fix" date="2011-07-13"> + ZipFile may leak resources on some JDKs. + </action> + <action type="update" date="2011-04-18"> + ZipFile now implements finalize which closes the underlying + file. + </action> + <action issue="COMPRESS-117" type="update" date="2011-03-23"> + Certain tar files not recognised by ArchiveStreamFactory. + </action> + <action issue="COMPRESS-125" type="fix" date="2011-03-23"> + BZip2CompressorInputStream throws IOException if underlying stream returns available() == 0. + Removed the check. + </action> + <action issue="COMPRESS-127" type="fix" date="2011-03-23"> + Calling close() on inputStream returned by CompressorStreamFactory.createCompressorInputStream() + does not close the underlying input stream. + </action> + <action issue="COMPRESS-122" type="add" date="2010-10-29"> + TarArchiveEntry provides access to the flags that determine + whether it is an archived symbolic link, pipe or other + "uncommon" file system object. + </action> + <action issue="COMPRESS-119" type="fix" date="2010-10-26"> + TarArchiveOutputStream#finish now writes all buffered data to the stream + </action> + </release> + <release version="1.1" date="2010-08-13" description="Release 1.1"> + <action issue="COMPRESS-72" type="fix" date="2010-06-02"> + Move acknowledgements from NOTICE to README + </action> + <action issue="COMPRESS-113" type="fix" date="2010-06-02"> + TarArchiveEntry.parseTarHeader() includes the trailing space/NUL when parsing the octal size + </action> + <action issue="COMPRESS-108" type="add" date="2010-05-23"> + Command-line interface to list archive contents. + Usage: java -jar commons-compress-n.m.jar archive-name [zip|tar|etc] + </action> + <action issue="COMPRESS-118" type="fix" date="2010-05-17"> + TarUtils.parseName does not properly handle characters outside the range 0-127 + </action> + <action issue="COMPRESS-112" type="update" date="2010-05-13"> + ArArchiveInputStream does not handle GNU extended filename records (//) + </action> + <action issue="COMPRESS-109" type="add" date="2010-05-10"> + Tar implementation does not support Pax headers + Added support for reading pax headers. + Note: does not support global pax headers + </action> + <action issue="COMPRESS-107" type="fix" date="2010-05-10"> + ArchiveStreamFactory does not recognise tar files created by Ant + </action> + <action issue="COMPRESS-110" type="fix" date="2010-05-09"> + Support "ustar" prefix field, which is used when file paths are longer + than 100 characters. + </action> + <action type="update" date="2010-04-19" issue="COMPRESS-105"> + Document that the name of an ZipArchiveEntry determines whether + an entry is considered a directory or not. + If you don't use the constructor with the File argument the entry's + name must end in a "/" in order for the entry to be known as a directory. + </action> + <action type="add" date="2010-03-19" issue="COMPRESS-103"> + ZipArchiveInputStream can optionally extract data that used + the STORED compression method and a data descriptor. + Doing so in a stream is not safe in general, so you have to + explicitly enable the feature. By default the stream will + throw an exception if it encounters such an entry. + </action> + <action type="fix" date="2010-03-12" issue="COMPRESS-100"> + ZipArchiveInputStream will throw an exception if it detects an + entry that uses a data descriptor for a STORED entry since it + cannot reliably find the end of data for this "compression" + method. + </action> + <action type="fix" date="2010-03-12" issue="COMPRESS-101"> + ZipArchiveInputStream should now properly read archives that + use data descriptors but without the "unofficial" signature. + </action> + <action type="add" date="2010-03-09" issue="COMPRESS-98"> + The ZIP classes will throw specialized exceptions if any + attempt is made to read or write data that uses zip features + not supported (yet). + </action> + <action type="add" date="2010-03-08" issue="COMPRESS-99"> + ZipFile#getEntries returns entries in a predictable order - + the order they appear inside the central directory. + A new method getEntriesInPhysicalOrder returns entries in + order of the entry data, i.e. the order ZipArchiveInputStream + would see. + </action> + <action type="add" date="2010-02-19"> + The Archive*Stream and ZipFile classes now have + can(Read|Write)EntryData methods that can be used to check + whether a given entry's data can be read/written. + The method currently returns false for ZIP archives if an + entry uses an unsupported compression method or encryption. + </action> + <action type="add" date="2010-02-19" issue="COMPRESS-89"> + The ZIP classes now detect encrypted entries. + </action> + <action type="update" date="2010-02-18" issue="COMPRESS-79"> + Move DOS/Java time conversions into Zip utility class. + </action> + <action type="fix" date="2010-02-18" issue="COMPRESS-74"> + ZipArchiveInputStream failed to update the number of bytes + read properly. + </action> + <action type="fix" date="2010-02-18"> + ArchiveInputStream has a new method getBytesRead that should + be preferred over getCount since the later may truncate the + number of bytes read for big archives. + </action> + <action type="fix" date="2010-02-18" issue="COMPRESS-85"> + The cpio archives created by CpioArchiveOutputStream couldn't + be read by many existing native implementations because the + archives contained multiple entries with the same inode/device + combinations and weren't padded to a blocksize of 512 bytes. + </action> + <action type="fix" date="2010-02-16" issue="COMPRESS-73"> + ZipArchiveEntry, ZipFile and ZipArchiveInputStream are now + more lenient when parsing extra fields. + </action> + <action issue="COMPRESS-75" type="update" date="2010-02-12"> + ZipArchiveInputStream does not show location in file where a problem occurred. + </action> + <action type="fix" date="2010-02-12" issue="COMPRESS-82"> + cpio is terribly slow. + Documented that buffered streams are needed for performance + </action> + <action type="add" date="2010-02-12" issue="COMPRESS-97"> + Added autodetection of compression format to + CompressorStreamFactory. + </action> + <action type="fix" date="2010-02-12"> + Improved exception message if the extra field data in ZIP + archives cannot be parsed. + </action> + <action type="fix" date="2010-02-11" issue="COMPRESS-17"> + Tar format unspecified - current support documented. + </action> + <action type="add" issue="COMPRESS-95" date="2010-01-29" + due-to="Joerg Bellmann"> + Improve ExceptionMessages in ArchiveStreamFactory + </action> + <action type="fix" issue="COMPRESS-94" date="2010-01-07" + due-to="Anon Devs"> + ZipArchiveEntry's equals method was broken for entries created + with the String-arg constructor. This lead to broken ZIP + archives if two different entries had the same hash code. + </action> + <action type="fix" issue="COMPRESS-87" date="2009-10-30" + due-to="Antoni Mylka"> + ZipArchiveInputStream could repeatedly return 0 on read() when + the archive was truncated. + </action> + <action type="fix" issue="COMPRESS-86" date="2009-10-08"> + Tar archive entries holding the file name for names longer + than 100 characters in GNU longfile mode didn't properly + specify they'd be using the "oldgnu" extension. + </action> + <action type="add" date="2009-08-25"> + A new constructor of TarArchiveEntry can create entries with + names that start with slashes - the default is to strip + leading slashes in order to create relative path names. + </action> + <action issue="COMPRESS-83" type="fix" date="2009-08-01"> + Delegate all read and write methods in GZip stream in order to + speed up operations. + </action> + <action type="add" date="2009-08-01"> + ArchiveEntry now has a getLastModifiedDate method. + </action> + <action type="fix" date="2009-08-01"> + The ar and cpio streams now properly read and write last + modified times. + </action> + <action issue="COMPRESS-81" type="fix" date="2009-06-30"> + TarOutputStream can leave garbage at the end of the archive + </action> + <action issue="COMPRESS-78" type="add" date="2009-06-30" + due-to="Jukka Zitting"> + Add a BZip2Utils class modelled after GZipUtils + </action> + </release> + <release version="1.0" date="2009-05-21" description="First Public Release"> + <action dev="all" type="add" date="2009-05-21"> + Initial release + </action> + <action dev="sgoeschl" type="fix"> + Updating the pom.xml for preparing a move to commons-proper + </action> + </release> + </body> +</document> diff --git a/src/changes/release-notes.vm b/src/changes/release-notes.vm new file mode 100644 index 000000000..c796ae3d2 --- /dev/null +++ b/src/changes/release-notes.vm @@ -0,0 +1,688 @@ +## Licensed to the Apache Software Foundation (ASF) under one +## or more contributor license agreements. See the NOTICE file +## distributed with this work for additional information +## regarding copyright ownership. The ASF licenses this file +## to you under the Apache License, Version 2.0 (the +## "License"); you may not use this file except in compliance +## with the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, +## software distributed under the License is distributed on an +## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +## KIND, either express or implied. See the License for the +## specific language governing permissions and limitations +## under the License. + ${project.name} RELEASE NOTES + +$introduction.replaceAll("(?<!\015)\012", " +") + +## N.B. the available variables are described here: +## http://maven.apache.org/plugins/maven-changes-plugin/examples/using-a-custom-announcement-template.html +## Hack to improve layout: replace all pairs of spaces with a single new-line +$release.description.replaceAll(" ", " +") + +## Fix up indentation for multi-line action descriptions +#macro ( indent ) +#set($action=$action.replaceAll("(?m)^ +"," ")) +#end +#if ($release.getActions().size() == 0) +No changes defined in this version. +#else +#if ($release.getActions('add').size() !=0) +New features: +#foreach($actionItem in $release.getActions('add')) +## Use replaceAll to fix up LF-only line ends on Windows. +#set($action=$actionItem.getAction().replaceAll("\n"," +")) +#indent() +#if ($actionItem.getIssue()) +#set($issue=$actionItem.getIssue()) +#else +#set($issue="") +#end +#if ($actionItem.getDueTo()) +#set($dueto=$actionItem.getDueTo()) +#else +#set($dueto="") +#end +o ${action}## +#if($!issue != "") + + Issue: $issue. #if($!dueto != "")Thanks to $dueto. #end +#else#if($!dueto != "") Thanks to $dueto. #end +#end + +#set($issue="") +#set($dueto="") +#end +#end + +#if ($release.getActions('fix').size() !=0) +Fixed Bugs: +#foreach($actionItem in $release.getActions('fix')) +## Use replaceAll to fix up LF-only line ends on Windows. +#set($action=$actionItem.getAction().replaceAll("\n"," +")) +#indent() +#if ($actionItem.getIssue()) +#set($issue=$actionItem.getIssue()) +#else +#set($issue="") +#end +#if ($actionItem.getDueTo()) +#set($dueto=$actionItem.getDueTo()) +#else +#set($dueto="") +#end +o ${action}## +#if($!issue != "") + + Issue: $issue. #if($!dueto != "")Thanks to $dueto. #end +#else#if($!dueto != "") Thanks to $dueto. #end +#end + +#set($issue="") +#set($dueto="") +#end +#end + +#if ($release.getActions('update').size() !=0) +Changes: +#foreach($actionItem in $release.getActions('update')) +## Use replaceAll to fix up LF-only line ends on Windows. +#set($action=$actionItem.getAction().replaceAll("\n"," +")) +#indent() +#if ($actionItem.getIssue()) +#set($issue=$actionItem.getIssue()) +#else +#set($issue="") +#end +#if ($actionItem.getDueTo()) +#set($dueto=$actionItem.getDueTo()) +#else +#set($dueto="") +#end +o ${action}## +#if($!issue != "") + + Issue: $issue. #if($!dueto != "")Thanks to $dueto. #end +#else#if($!dueto != "") Thanks to $dueto. #end +#end + +#set($issue="") +#set($dueto="") +#end +#end + +#if ($release.getActions('remove').size() !=0) +Removed: +#foreach($actionItem in $release.getActions('remove')) +## Use replaceAll to fix up LF-only line ends on Windows. +#set($action=$actionItem.getAction().replaceAll("\n"," +")) +#indent() +#if ($actionItem.getIssue()) +#set($issue=$actionItem.getIssue()) +#else +#set($issue="") +#end +#if ($actionItem.getDueTo()) +#set($dueto=$actionItem.getDueTo()) +#else +#set($dueto="") +#end +o ${action}## +#if($!issue != "") + + Issue: $issue. #if($!dueto != "")Thanks to $dueto. #end +#else#if($!dueto != "") Thanks to $dueto. #end +#end + +#set($issue="") +#set($dueto="") +#end +#end +## End of main loop +#end +#macro ( fixurl $url ) +$url.replaceAll("proper/commons-","") +#end +For complete information on ${project.name}, including instructions +on how to submit bug reports, patches, or suggestions for improvement, +see the ${project.name} website: + +#fixurl ( ${project.url} ) + +Old Release Notes +================= + +Release 1.9 +----------- + +New features: +o Added support for DEFLATE streams without any gzip framing. + Issue: COMPRESS-263. + Thanks to Matthias Stevens. + +Fixed Bugs: +o When reading 7z files unknown file properties and properties of type + kDummy are now ignored. + Issue: COMPRESS-287. +o Expanding 7z archives using LZMA compression could cause an + EOFException. + Issue: COMPRESS-286. +o Long-Name and -link or PAX-header entries in TAR archives always had + the current time as last modfication time, creating archives that + are different at the byte level each time an archive was built. + Issue: COMPRESS-289. + Thanks to Bob Robertson. + +Changes: +o Checking for XZ for Java may be expensive. The result will now be + cached outside of an OSGi environment. You can use the new + XZUtils#setCacheXZAvailability to overrride this default behavior. + Issue: COMPRESS-285. + +Release 1.8.1 +------------- + +New features: +o COMPRESS-272: CompressorStreamFactory can now auto-detect Unix compress + (".Z") streams. + +Fixed Bugs: +o COMPRESS-270: The snappy, ar and tar inputstreams might fail to read from a + non-buffered stream in certain cases. +o COMPRESS-277: IOUtils#skip might skip fewer bytes than requested even though + more could be read from the stream. +o COMPRESS-276: ArchiveStreams now validate there is a current entry before + reading or writing entry data. +o ArjArchiveInputStream#canReadEntryData tested the current + entry of the stream rather than its argument. +o COMPRESS-274: ChangeSet#delete and deleteDir now properly deal with unnamed + entries. +o COMPRESS-273: Added a few null checks to improve robustness. +o COMPRESS-278: TarArchiveInputStream failed to read archives with empty + gid/uid fields. +o COMPRESS-279: TarArchiveInputStream now again throws an exception when it + encounters a truncated archive while reading from the last + entry. +o COMPRESS-280: Adapted TarArchiveInputStream#skip to the modified + IOUtils#skip method. Thanks to BELUGA BEHR. + +Changes: +o The dependency on org.tukaani:xz is now marked as optional. + +Release 1.8 +----------- + +New features: +o GzipCompressorInputStream now provides access to the same + metadata that can be provided via GzipParameters when writing + a gzip stream. + Issue: COMPRESS-260. +o SevenZOutputFile now supports chaining multiple + compression/encryption/filter methods and passing options to + the methods. + Issue: COMPRESS-266. +o The (compression) method(s) can now be specified per entry in + SevenZOutputFile. + Issue: COMPRESS-261. +o SevenZArchiveEntry "knows" which method(s) have been used to + write it to the archive. + Issue: COMPRESS-258. +o The 7z package now supports the delta filter as method. +o The 7z package now supports BCJ filters for several platforms. + You will need a version >= 1.5 of XZ for Java to read archives + using BCJ, though. + Issue: COMPRESS-257. + +Fixed Bugs: +o BZip2CompressorInputStream read fewer bytes than possible from + a truncated stream. + Issue: COMPRESS-253. +o SevenZFile failed claiming the dictionary was too large when + archives used LZMA compression for headers and content and + certain non-default dictionary sizes. + Issue: COMPRESS-253. +o CompressorStreamFactory.createCompressorInputStream with + explicit compression did not honor decompressConcatenated + Issue: COMPRESS-259. +o TarArchiveInputStream will now read archives created by tar + implementations that encode big numbers by not adding a + trailing NUL. + Issue: COMPRESS-262. +o ZipArchiveInputStream would return NUL bytes for the first 512 + bytes of a STORED entry if it was the very first entry of the + archive. + Issue: COMPRESS-264. +o When writing PAX/POSIX headers for TAR entries with + backslashes or certain non-ASCII characters in their name + TarArchiveOutputStream could fail. + Issue: COMPRESS-265. +o ArchiveStreamFactory now throws a StreamingNotSupported - a + new subclass of ArchiveException - if it is asked to read from + or write to a stream and Commons Compress doesn't support + streaming for the format. This currently only applies to the + 7z format. + Issue: COMPRESS-267. + +Release 1.7 +----------- + +New features: +o Read-Only support for Snappy compression. + Issue: COMPRESS-147. Thanks to BELUGA BEHR. +o Read-Only support for .Z compressed files. + Issue: COMPRESS-243. Thanks to Damjan Jovanovic. +o ZipFile and ZipArchiveInputStream now support reading entries + compressed using the SHRINKING method. Thanks to Damjan Jovanovic. +o GzipCompressorOutputStream now supports setting the compression + level and the header metadata (filename, comment, modification time, + operating system and extra flags) + Issue: COMPRESS-250. Thanks to Emmanuel Bourg. +o ZipFile and ZipArchiveInputStream now support reading entries + compressed using the IMPLODE method. + Issue: COMPRESS-115. Thanks to Emmanuel Bourg. +o ZipFile and the 7z file classes now implement Closeable and can be + used in try-with-resources constructs. + +Fixed Bugs: +o SevenZOutputFile#closeArchiveEntry throws an exception when using + LZMA2 compression on Java8. Issue: COMPRESS-241. +o 7z reading of big 64bit values could be wrong. + Issue: COMPRESS-244. Thanks to Nico Kruber. +o TarArchiveInputStream could fail to read an archive completely. + Issue: COMPRESS-245. +o The time-setters in X5455_ExtendedTimestamp now set the + corresponding flags explicitly - i.e. they set the bit if the valus + is not-null and reset it otherwise. This may cause + incompatibilities if you use setFlags to unset a bit and later set + the time to a non-null value - the flag will now be set. + Issue: COMPRESS-242. +o SevenZOutputFile would create invalid archives if more than six + empty files or directories were included. Issue: COMPRESS-252. + +Release 1.6 +----------- + +Version 1.6 introduces changes to the internal API of the tar package that +break backwards compatibility in the following rare cases. This version +removes the package private TarBuffer class along with the protected "buffer" +members in TarArchiveInputStream and TarArchiveOutputStream. This change will +only affect you if you have created a subclass of one of the stream classes +and accessed the buffer member or directly used the TarBuffer class. + +Changes in this version include: + +New features: +o Added support for 7z archives. Most compression algorithms + can be read and written, LZMA and encryption are only + supported when reading. Issue: COMPRESS-54. Thanks to Damjan Jovanovic. +o Added read-only support for ARJ archives that don't use + compression. Issue: COMPRESS-226. Thanks to Damjan Jovanovic. +o DumpArchiveInputStream now supports an encoding parameter that + can be used to specify the encoding of file names. +o The CPIO streams now support an encoding parameter that can be + used to specify the encoding of file names. +o Read-only support for LZMA standalone compression has been added. + Issue: COMPRESS-111. + +Fixed Bugs: +o TarBuffer.tryToConsumeSecondEOFRecord could throw a + NullPointerException Issue: COMPRESS-223. Thanks to Jeremy Gustie. +o Parsing of zip64 extra fields has become more lenient in order + to be able to read archives created by DotNetZip and maybe + other archivers as well. Issue: COMPRESS-228. +o TAR will now properly read the names of symbolic links with + long names that use the GNU variant to specify the long file + name. Issue: COMPRESS-229. Thanks to Christoph Gysin. +o ZipFile#getInputStream could return null if the archive + contained duplicate entries. + The class now also provides two new methods to obtain all + entries of a given name rather than just the first one. + Issue: COMPRESS-227. +o CpioArchiveInputStream failed to read archives created by + Redline RPM. Issue: COMPRESS-236. Thanks to Andrew Duffy. +o TarArchiveOutputStream now properly handles link names that + are too long to fit into a traditional TAR header. Issue: + COMPRESS-237. Thanks to Emmanuel Bourg. +o The auto-detecting create*InputStream methods of Archive and + CompressorStreamFactory could fail to detect the format of + blocking input streams. Issue: COMPRESS-239. + +Changes: +o Readabilty patch to TarArchiveInputStream. Issue: + COMPRESS-232. Thanks to BELUGA BEHR. +o Performance improvements to TarArchiveInputStream, in + particular to the skip method. Issue: COMPRESS-234. Thanks to + BELUGA BEHR. + +Release 1.5 +----------- + +New features: + +o CompressorStreamFactory has an option to create decompressing + streams that decompress the full input for formats that support + multiple concatenated streams. + Issue: COMPRESS-220. + +Fixed Bugs: + +o Typo in CompressorStreamFactory Javadoc + Issue: COMPRESS-218. + Thanks to Gili. +o ArchiveStreamFactory's tar stream detection created false positives + for AIFF files. + Issue: COMPRESS-191. + Thanks to Jukka Zitting. +o XZ for Java didn't provide an OSGi bundle. Compress' dependency on + it has now been marked optional so Compress itself can still be used + in an OSGi context. + Issue: COMPRESS-199. + Thanks to Jukka Zitting. +o When specifying the encoding explicitly TarArchiveOutputStream would + write unreadable names in GNU mode or even cause errors in POSIX + mode for file names longer than 66 characters. + Issue: COMPRESS-200. + Thanks to Christian Schlichtherle. +o Writing TAR PAX headers failed if the generated entry name ended + with a "/". + Issue: COMPRESS-203. +o ZipArchiveInputStream sometimes failed to provide input to the + Inflater when it needed it, leading to reads returning 0. + Issue: COMPRESS-189. + Thanks to Daniel Lowe. +o TarArchiveInputStream ignored the encoding for GNU long name + entries. + Issue: COMPRESS-212. +o TarArchiveInputStream could leave the second EOF record inside the + stream it had just finished reading. + Issue: COMPRESS-206. + Thanks to Peter De Maeyer. +o DumpArchiveInputStream no longer implicitly closes the original + input stream when it reaches the end of the archive. +o ZipArchiveInputStream now consumes the remainder of the archive when + getNextZipEntry returns null. +o Unit tests could fail if the source tree was checked out to a + directory tree containign spaces. + Issue: COMPRESS-205. + Thanks to Daniel Lowe. +o Fixed a potential ArrayIndexOutOfBoundsException when reading STORED + entries from ZipArchiveInputStream. + Issue: COMPRESS-219. +o CompressorStreamFactory can now be used without XZ for Java being + available. + Issue: COMPRESS-221. + +Changes: + +o Improved exception message if a zip archive cannot be read because + of an unsupported compression method. + Issue: COMPRESS-188. + Thanks to Harald Kuhn. +o ArchiveStreamFactory has a setting for file name encoding that sets + up encoding for ZIP and TAR streams. + Issue: COMPRESS-192. + Thanks to Jukka Zitting. +o TarArchiveEntry now has a method to verify its checksum. + Issue: COMPRESS-191. + Thanks to Jukka Zitting. +o Split/spanned ZIP archives are now properly detected by + ArchiveStreamFactory but will cause an + UnsupportedZipFeatureException when read. +o ZipArchiveInputStream now reads archives that start with a "PK00" + signature. Archives with this signatures are created when the + archiver was willing to split the archive but in the end only needed + a single segment - so didn't split anything. + Issue: COMPRESS-208. +o TarArchiveEntry has a new constructor that allows setting linkFlag + and preserveLeadingSlashes at the same time. + Issue: COMPRESS-201. +o ChangeSetPerformer has a new perform overload that uses a ZipFile + instance as input. + Issue: COMPRESS-159. +o Garbage collection pressure has been reduced by reusing temporary + byte arrays in classes. + Issue: COMPRESS-172. + Thanks to Thomas Mair. +o Can now handle zip extra field 0x5455 - Extended Timestamp. + Issue: COMPRESS-210. + Thanks to Julius Davies. +o handle zip extra field 0x7875 - Info Zip New Unix Extra Field. + Issue: COMPRESS-211. + Thanks to Julius Davies. +o ZipShort, ZipLong, ZipEightByteInteger should implement Serializable + Issue: COMPRESS-213. + Thanks to Julius Davies. +o better support for unix symlinks in ZipFile entries. + Issue: COMPRESS-214. + Thanks to Julius Davies. +o ZipFile's initialization has been improved for non-Zip64 archives. + Issue: COMPRESS-215. + Thanks to Robin Power. +o Updated XZ for Java dependency to 1.2 as this version provides + proper OSGi manifest attributes. + +Release 1.4.1 +------------- + +This is a security bugfix release, see +https://commons.apache.org/proper/commons-compress/security.html#Fixed_in_Apache_Commons_Compress_1.4.1 + +Fixed Bugs: + +o Ported libbzip2's fallback sort algorithm to + BZip2CompressorOutputStream to speed up compression in certain + edge cases. + +Release 1.4 +----------- + +New features: +o COMPRESS-156: Support for the XZ format has been added. + +Fixed Bugs: +o COMPRESS-183: The tar package now allows the encoding of file names to be + specified and can optionally use PAX extension headers to + write non-ASCII file names. + The stream classes now write (or expect to read) archives that + use the platform's native encoding for file names. Apache + Commons Compress 1.3 used to strip everything but the lower + eight bits of each character which effectively only worked for + ASCII and ISO-8859-1 file names. + This new default behavior is a breaking change. +o COMPRESS-184: TarArchiveInputStream failed to parse PAX headers that + contained non-ASCII characters. +o COMPRESS-178: TarArchiveInputStream throws IllegalArgumentException instead of IOException +o COMPRESS-179: TarUtils.formatLongOctalOrBinaryBytes() assumes the field will be 12 bytes long +o COMPRESS-175: GNU Tar sometimes uses binary encoding for UID and GID +o COMPRESS-171: ArchiveStreamFactory.createArchiveInputStream would claim + short text files were TAR archives. +o COMPRESS-164: ZipFile didn't work properly for archives using unicode extra + fields rather than UTF-8 filenames and the EFS-Flag. +o COMPRESS-169: For corrupt archives ZipFile would throw a RuntimeException in + some cases and an IOException in others. It will now + consistently throw an IOException. + +Changes: +o COMPRESS-182: The tar package can now write archives that use star/GNU/BSD + extensions or use the POSIX/PAX variant to store numeric + values that don't fit into the traditional header fields. +o COMPRESS-181: Added a workaround for a Bug some tar implementations that add + a NUL byte as first byte in numeric header fields. +o COMPRESS-176: Added a workaround for a Bug in WinZIP which uses backslashes + as path separators in Unicode Extra Fields. +o COMPRESS-131: ArrayOutOfBounds while decompressing bz2. Added test case - code already seems to have been fixed. +o COMPRESS-146: BZip2CompressorInputStream now optionally supports reading of + concatenated .bz2 files. +o COMPRESS-154: GZipCompressorInputStream now optionally supports reading of + concatenated .gz files. +o COMPRESS-16: The tar package can now read archives that use star/GNU/BSD + extensions for files that are longer than 8 GByte as well as + archives that use the POSIX/PAX variant. +o COMPRESS-165: The tar package can now write archives that use star/GNU/BSD + extensions for files that are longer than 8 GByte as well as + archives that use the POSIX/PAX variant. +o COMPRESS-166: The tar package can now use the POSIX/PAX variant for writing + entries with names longer than 100 characters. + +Release 1.3 +----------- + +Commons Compress 1.3 is the first version to require Java5 at runtime. + +Changes in this version include: + +New features: +o Support for the Pack200 format has been added. Issue: COMPRESS-142. +o Read-only support for the format used by the Unix dump(8) tool + has been added. Issue: COMPRESS-132. + +Fixed Bugs: +o BZip2CompressorInputStream's getBytesRead method always + returned 0. +o ZipArchiveInputStream and ZipArchiveOutputStream could leak + resources on some JDKs. Issue: COMPRESS-152. +o TarArchiveOutputStream's getBytesWritten method didn't count + correctly. Issue: COMPRESS-160. + +Changes: +o The ZIP package now supports Zip64 extensions. Issue: COMPRESS-36. +o The AR package now supports the BSD dialect of storing file + names longer than 16 chars (both reading and writing). + Issue: COMPRESS-144. + +Release 1.2 +----------- + +New features: +o COMPRESS-123: ZipArchiveEntry has a new method getRawName that provides the + original bytes that made up the name. This may allow user + code to detect the encoding. +o COMPRESS-122: TarArchiveEntry provides access to the flags that determine + whether it is an archived symbolic link, pipe or other + "uncommon" file system object. + +Fixed Bugs: +o COMPRESS-129: ZipArchiveInputStream could fail with a "Truncated ZIP" error + message for entries between 2 GByte and 4 GByte in size. +o COMPRESS-145: TarArchiveInputStream now detects sparse entries using the + oldgnu format and properly reports it cannot extract their + contents. +o COMPRESS-130: The Javadoc for ZipArchiveInputStream#skip now matches the + implementation, the code has been made more defensive. +o COMPRESS-140: ArArchiveInputStream fails if entries contain only blanks for + userId or groupId. Thanks to Trejkaz. +o COMPRESS-139: ZipFile may leak resources on some JDKs. +o COMPRESS-125: BZip2CompressorInputStream throws IOException if + underlying stream returns available() == 0. + Removed the check. +o COMPRESS-127: Calling close() on inputStream returned by + CompressorStreamFactory.createCompressorInputStream() + does not close the underlying input stream. +o COMPRESS-119: TarArchiveOutputStream#finish now writes all buffered + data to the stream + +Changes: +o ZipFile now implements finalize which closes the underlying + file. +o COMPRESS-117: Certain tar files not recognised by + ArchiveStreamFactory. + +Release 1.1 +----------- + +New features: +o COMPRESS-108: Command-line interface to list archive contents. + Usage: java -jar commons-compress-n.m.jar archive-name [zip|tar|etc] +o COMPRESS-109: Tar implementation does not support Pax headers + Added support for reading pax headers. + Note: does not support global pax headers +o COMPRESS-103: ZipArchiveInputStream can optionally extract data that used + the STORED compression method and a data descriptor. + Doing so in a stream is not safe in general, so you have to + explicitly enable the feature. By default the stream will + throw an exception if it encounters such an entry. +o COMPRESS-98: The ZIP classes will throw specialized exceptions if any + attempt is made to read or write data that uses zip features + not supported (yet). +o COMPRESS-99: ZipFile#getEntries returns entries in a predictable order - + the order they appear inside the central directory. + A new method getEntriesInPhysicalOrder returns entries in + order of the entry data, i.e. the order ZipArchiveInputStream + would see. +o The Archive*Stream and ZipFile classes now have + can(Read|Write)EntryData methods that can be used to check + whether a given entry's data can be read/written. + The method currently returns false for ZIP archives if an + entry uses an unsupported compression method or encryption. +o COMPRESS-89: The ZIP classes now detect encrypted entries. +o COMPRESS-97: Added autodetection of compression format to + CompressorStreamFactory. +o COMPRESS-95: Improve ExceptionMessages in ArchiveStreamFactory Thanks to Joerg Bellmann. +o A new constructor of TarArchiveEntry can create entries with + names that start with slashes - the default is to strip + leading slashes in order to create relative path names. +o ArchiveEntry now has a getLastModifiedDate method. +o COMPRESS-78: Add a BZip2Utils class modelled after GZipUtils Thanks to Jukka Zitting. + +Fixed Bugs: +o COMPRESS-72: Move acknowledgements from NOTICE to README +o COMPRESS-113: TarArchiveEntry.parseTarHeader() includes the trailing space/NUL when parsing the octal size +o COMPRESS-118: TarUtils.parseName does not properly handle characters outside the range 0-127 +o COMPRESS-107: ArchiveStreamFactory does not recognise tar files created by Ant +o COMPRESS-110: Support "ustar" prefix field, which is used when file paths are longer + than 100 characters. +o COMPRESS-100: ZipArchiveInputStream will throw an exception if it detects an + entry that uses a data descriptor for a STORED entry since it + cannot reliably find the end of data for this "compression" + method. +o COMPRESS-101: ZipArchiveInputStream should now properly read archives that + use data descriptors but without the "unofficial" signature. +o COMPRESS-74: ZipArchiveInputStream failed to update the number of bytes + read properly. +o ArchiveInputStream has a new method getBytesRead that should + be preferred over getCount since the later may truncate the + number of bytes read for big archives. +o COMPRESS-85: The cpio archives created by CpioArchiveOutputStream couldn't + be read by many existing native implementations because the + archives contained multiple entries with the same inode/device + combinations and weren't padded to a blocksize of 512 bytes. +o COMPRESS-73: ZipArchiveEntry, ZipFile and ZipArchiveInputStream are now + more lenient when parsing extra fields. +o COMPRESS-82: cpio is terribly slow. + Documented that buffered streams are needed for performance +o Improved exception message if the extra field data in ZIP + archives cannot be parsed. +o COMPRESS-17: Tar format unspecified - current support documented. +o COMPRESS-94: ZipArchiveEntry's equals method was broken for entries created + with the String-arg constructor. This lead to broken ZIP + archives if two different entries had the same hash code. Thanks to Anon Devs. +o COMPRESS-87: ZipArchiveInputStream could repeatedly return 0 on read() when + the archive was truncated. Thanks to Antoni Mylka. +o COMPRESS-86: Tar archive entries holding the file name for names longer + than 100 characters in GNU longfile mode didn't properly + specify they'd be using the "oldgnu" extension. +o COMPRESS-83: Delegate all read and write methods in GZip stream in order to + speed up operations. +o The ar and cpio streams now properly read and write last + modified times. +o COMPRESS-81: TarOutputStream can leave garbage at the end of the archive + +Changes: +o COMPRESS-112: ArArchiveInputStream does not handle GNU extended filename records (//) +o COMPRESS-105: Document that the name of an ZipArchiveEntry determines whether + an entry is considered a directory or not. + If you don't use the constructor with the File argument the entry's + name must end in a "/" in order for the entry to be known as a directory. +o COMPRESS-79: Move DOS/Java time conversions into Zip utility class. +o COMPRESS-75: ZipArchiveInputStream does not show location in file + where a problem occurred. + diff --git a/src/main/java/org/apache/commons/compress/MemoryLimitException.java b/src/main/java/org/apache/commons/compress/MemoryLimitException.java new file mode 100644 index 000000000..d251fb3f7 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/MemoryLimitException.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress; + +import java.io.IOException; + +/** + * If a stream checks for estimated memory allocation, and the estimate + * goes above the memory limit, this is thrown. This can also be thrown + * if a stream tries to allocate a byte array that is larger than + * the allowable limit. + * + * @since 1.14 + */ +public class MemoryLimitException extends IOException { + + private static final long serialVersionUID = 1L; + + //long instead of int to account for overflow for corrupt files + private final long memoryNeededInKb; + private final int memoryLimitInKb; + + public MemoryLimitException(long memoryNeededInKb, int memoryLimitInKb) { + super(buildMessage(memoryNeededInKb, memoryLimitInKb)); + this.memoryNeededInKb = memoryNeededInKb; + this.memoryLimitInKb = memoryLimitInKb; + } + + public MemoryLimitException(long memoryNeededInKb, int memoryLimitInKb, Exception e) { + super(buildMessage(memoryNeededInKb, memoryLimitInKb), e); + this.memoryNeededInKb = memoryNeededInKb; + this.memoryLimitInKb = memoryLimitInKb; + } + + public long getMemoryNeededInKb() { + return memoryNeededInKb; + } + + public int getMemoryLimitInKb() { + return memoryLimitInKb; + } + + private static String buildMessage(long memoryNeededInKb, int memoryLimitInKb) { + return memoryNeededInKb + " kb of memory would be needed; limit was " + + memoryLimitInKb + " kb. " + + "If the file is not corrupt, consider increasing the memory limit."; + } +} diff --git a/src/main/java/org/apache/commons/compress/PasswordRequiredException.java b/src/main/java/org/apache/commons/compress/PasswordRequiredException.java new file mode 100644 index 000000000..d876b96b0 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/PasswordRequiredException.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress; + +import java.io.IOException; + +/** + * Exception thrown when trying to read an encrypted entry or file without + * configuring a password. + * @since 1.10 + */ +public class PasswordRequiredException extends IOException { + + private static final long serialVersionUID = 1391070005491684483L; + + /** + * Create a new exception. + * + * @param name name of the archive containing encrypted streams or + * the encrypted file. + */ + public PasswordRequiredException(final String name) { + super("Cannot read encrypted content from " + name + " without a password."); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveEntry.java new file mode 100644 index 000000000..d5fa746a6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveEntry.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import java.util.Date; + +/** + * Represents an entry of an archive. + */ +public interface ArchiveEntry { + + /** + * Gets the name of the entry in this archive. May refer to a file or directory or other item. + * + * <p>This method returns the raw name as it is stored inside of the archive.</p> + * + * @return The name of this entry in the archive. + */ + String getName(); + + /** + * Gets the uncompressed size of this entry. May be -1 (SIZE_UNKNOWN) if the size is unknown + * + * @return the uncompressed size of this entry. + */ + long getSize(); + + /** Special value indicating that the size is unknown */ + long SIZE_UNKNOWN = -1; + + /** + * Returns true if this entry refers to a directory. + * + * @return true if this entry refers to a directory. + */ + boolean isDirectory(); + + /** + * Gets the last modified date of this entry. + * + * @return the last modified date of this entry. + * @since 1.1 + */ + Date getLastModifiedDate(); +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveException.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveException.java new file mode 100644 index 000000000..bb577850c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveException.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +/** + * Archiver related Exception + */ +public class ArchiveException extends Exception { + + /** Serial */ + private static final long serialVersionUID = 2772690708123267100L; + + /** + * Constructs a new exception with the specified detail message. The cause + * is not initialized. + * + * @param message + * the detail message + */ + public ArchiveException(final String message) { + super(message); + } + + /** + * Constructs a new exception with the specified detail message and cause. + * + * @param message + * the detail message + * @param cause + * the cause + */ + public ArchiveException(final String message, final Exception cause) { + super(message); + this.initCause(cause); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveInputStream.java new file mode 100644 index 000000000..9c4e97821 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveInputStream.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Archive input streams <b>MUST</b> override the + * {@link #read(byte[], int, int)} - or {@link #read()} - + * method so that reading from the stream generates EOF for the end of + * data in each entry as well as at the end of the file proper. + * <p> + * The {@link #getNextEntry()} method is used to reset the input stream + * ready for reading the data from the next entry. + * <p> + * The input stream classes must also implement a method with the signature: + * <pre> + * public static boolean matches(byte[] signature, int length) + * </pre> + * which is used by the {@link ArchiveStreamFactory} to autodetect + * the archive type from the first few bytes of a stream. + */ +public abstract class ArchiveInputStream extends InputStream { + + private final byte[] single = new byte[1]; + private static final int BYTE_MASK = 0xFF; + + /** holds the number of bytes read in this stream */ + private long bytesRead = 0; + + /** + * Returns the next Archive Entry in this Stream. + * + * @return the next entry, + * or {@code null} if there are no more entries + * @throws IOException if the next entry could not be read + */ + public abstract ArchiveEntry getNextEntry() throws IOException; + + /* + * Note that subclasses also implement specific get() methods which + * return the appropriate class without need for a cast. + * See SVN revision r743259 + * @return + * @throws IOException + */ + // public abstract XXXArchiveEntry getNextXXXEntry() throws IOException; + + /** + * Reads a byte of data. This method will block until enough input is + * available. + * + * Simply calls the {@link #read(byte[], int, int)} method. + * + * MUST be overridden if the {@link #read(byte[], int, int)} method + * is not overridden; may be overridden otherwise. + * + * @return the byte read, or -1 if end of input is reached + * @throws IOException + * if an I/O error has occurred + */ + @Override + public int read() throws IOException { + final int num = read(single, 0, 1); + return num == -1 ? -1 : single[0] & BYTE_MASK; + } + + /** + * Increments the counter of already read bytes. + * Doesn't increment if the EOF has been hit (read == -1) + * + * @param read the number of bytes read + */ + protected void count(final int read) { + count((long) read); + } + + /** + * Increments the counter of already read bytes. + * Doesn't increment if the EOF has been hit (read == -1) + * + * @param read the number of bytes read + * @since 1.1 + */ + protected void count(final long read) { + if (read != -1) { + bytesRead = bytesRead + read; + } + } + + /** + * Decrements the counter of already read bytes. + * + * @param pushedBack the number of bytes pushed back. + * @since 1.1 + */ + protected void pushedBackBytes(final long pushedBack) { + bytesRead -= pushedBack; + } + + /** + * Returns the current number of bytes read from this stream. + * @return the number of read bytes + * @deprecated this method may yield wrong results for large + * archives, use #getBytesRead instead + */ + @Deprecated + public int getCount() { + return (int) bytesRead; + } + + /** + * Returns the current number of bytes read from this stream. + * @return the number of read bytes + * @since 1.1 + */ + public long getBytesRead() { + return bytesRead; + } + + /** + * Whether this stream is able to read the given entry. + * + * <p> + * Some archive formats support variants or details that are not supported (yet). + * </p> + * + * @param archiveEntry + * the entry to test + * @return This implementation always returns true. + * + * @since 1.1 + */ + public boolean canReadEntryData(final ArchiveEntry archiveEntry) { + return true; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveOutputStream.java new file mode 100644 index 000000000..4377b6dc4 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveOutputStream.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Archive output stream implementations are expected to override the + * {@link #write(byte[], int, int)} method to improve performance. + * They should also override {@link #close()} to ensure that any necessary + * trailers are added. + * + * <p>The normal sequence of calls when working with ArchiveOutputStreams is:</p> + * <ul> + * <li>Create ArchiveOutputStream object,</li> + * <li>optionally write SFX header (Zip only),</li> + * <li>repeat as needed: + * <ul> + * <li>{@link #putArchiveEntry(ArchiveEntry)} (writes entry header), + * <li>{@link #write(byte[])} (writes entry data, as often as needed), + * <li>{@link #closeArchiveEntry()} (closes entry), + * </ul> + * </li> + * <li> {@link #finish()} (ends the addition of entries),</li> + * <li> optionally write additional data, provided format supports it,</li> + * <li>{@link #close()}.</li> + * </ul> + */ +public abstract class ArchiveOutputStream extends OutputStream { + + /** Temporary buffer used for the {@link #write(int)} method */ + private final byte[] oneByte = new byte[1]; + static final int BYTE_MASK = 0xFF; + + /** holds the number of bytes written to this stream */ + private long bytesWritten = 0; + // Methods specific to ArchiveOutputStream + + /** + * Writes the headers for an archive entry to the output stream. + * The caller must then write the content to the stream and call + * {@link #closeArchiveEntry()} to complete the process. + * + * @param entry describes the entry + * @throws IOException if an I/O error occurs + */ + public abstract void putArchiveEntry(ArchiveEntry entry) throws IOException; + + /** + * Closes the archive entry, writing any trailer information that may + * be required. + * @throws IOException if an I/O error occurs + */ + public abstract void closeArchiveEntry() throws IOException; + + /** + * Finishes the addition of entries to this stream, without closing it. + * Additional data can be written, if the format supports it. + * + * @throws IOException if the user forgets to close the entry. + */ + public abstract void finish() throws IOException; + + /** + * Create an archive entry using the inputFile and entryName provided. + * + * @param inputFile the file to create the entry from + * @param entryName name to use for the entry + * @return the ArchiveEntry set up with details from the file + * + * @throws IOException if an I/O error occurs + */ + public abstract ArchiveEntry createArchiveEntry(File inputFile, String entryName) throws IOException; + + // Generic implementations of OutputStream methods that may be useful to sub-classes + + /** + * Writes a byte to the current archive entry. + * + * <p>This method simply calls {@code write( byte[], 0, 1 )}. + * + * <p>MUST be overridden if the {@link #write(byte[], int, int)} method + * is not overridden; may be overridden otherwise. + * + * @param b The byte to be written. + * @throws IOException on error + */ + @Override + public void write(final int b) throws IOException { + oneByte[0] = (byte) (b & BYTE_MASK); + write(oneByte, 0, 1); + } + + /** + * Increments the counter of already written bytes. + * Doesn't increment if EOF has been hit ({@code written == -1}). + * + * @param written the number of bytes written + */ + protected void count(final int written) { + count((long) written); + } + + /** + * Increments the counter of already written bytes. + * Doesn't increment if EOF has been hit ({@code written == -1}). + * + * @param written the number of bytes written + * @since 1.1 + */ + protected void count(final long written) { + if (written != -1) { + bytesWritten = bytesWritten + written; + } + } + + /** + * Returns the current number of bytes written to this stream. + * @return the number of written bytes + * @deprecated this method may yield wrong results for large + * archives, use #getBytesWritten instead + */ + @Deprecated + public int getCount() { + return (int) bytesWritten; + } + + /** + * Returns the current number of bytes written to this stream. + * @return the number of written bytes + * @since 1.1 + */ + public long getBytesWritten() { + return bytesWritten; + } + + /** + * Whether this stream is able to write the given entry. + * + * <p>Some archive formats support variants or details that are + * not supported (yet).</p> + * + * @param archiveEntry + * the entry to test + * @return This implementation always returns true. + * @since 1.1 + */ + public boolean canWriteEntryData(final ArchiveEntry archiveEntry) { + return true; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java new file mode 100644 index 000000000..3cd8ba763 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java @@ -0,0 +1,592 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.Locale; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; +import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; +import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; +import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; +import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; +import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; +import org.apache.commons.compress.archivers.sevenz.SevenZFile; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.Lists; +import org.apache.commons.compress.utils.ServiceLoaderIterator; +import org.apache.commons.compress.utils.Sets; + +/** + * Factory to create Archive[In|Out]putStreams from names or the first bytes of + * the InputStream. In order to add other implementations, you should extend + * ArchiveStreamFactory and override the appropriate methods (and call their + * implementation from super of course). + * + * Compressing a ZIP-File: + * + * <pre> + * final OutputStream out = Files.newOutputStream(output.toPath()); + * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); + * + * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); + * IOUtils.copy(Files.newInputStream(file1.toPath()), os); + * os.closeArchiveEntry(); + * + * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); + * IOUtils.copy(Files.newInputStream(file2.toPath()), os); + * os.closeArchiveEntry(); + * os.close(); + * </pre> + * + * Decompressing a ZIP-File: + * + * <pre> + * final InputStream is = Files.newInputStream(input.toPath()); + * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); + * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); + * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName())); + * IOUtils.copy(in, out); + * out.close(); + * in.close(); + * </pre> + * @Immutable provided that the deprecated method setEntryEncoding is not used. + * @ThreadSafe even if the deprecated method setEntryEncoding is used + */ +public class ArchiveStreamFactory implements ArchiveStreamProvider { + + private static final int TAR_HEADER_SIZE = 512; + + private static final int DUMP_SIGNATURE_SIZE = 32; + + private static final int SIGNATURE_SIZE = 12; + + private static final ArchiveStreamFactory SINGLETON = new ArchiveStreamFactory(); + + /** + * Constant (value {@value}) used to identify the AR archive format. + * @since 1.1 + */ + public static final String AR = "ar"; + + /** + * Constant (value {@value}) used to identify the ARJ archive format. + * Not supported as an output stream type. + * @since 1.6 + */ + public static final String ARJ = "arj"; + + /** + * Constant (value {@value}) used to identify the CPIO archive format. + * @since 1.1 + */ + public static final String CPIO = "cpio"; + + /** + * Constant (value {@value}) used to identify the Unix DUMP archive format. + * Not supported as an output stream type. + * @since 1.3 + */ + public static final String DUMP = "dump"; + + /** + * Constant (value {@value}) used to identify the JAR archive format. + * @since 1.1 + */ + public static final String JAR = "jar"; + + /** + * Constant used to identify the TAR archive format. + * @since 1.1 + */ + public static final String TAR = "tar"; + + /** + * Constant (value {@value}) used to identify the ZIP archive format. + * @since 1.1 + */ + public static final String ZIP = "zip"; + + /** + * Constant (value {@value}) used to identify the 7z archive format. + * @since 1.8 + */ + public static final String SEVEN_Z = "7z"; + + /** + * Entry encoding, null for the platform default. + */ + private final String encoding; + + /** + * Entry encoding, null for the default. + */ + private volatile String entryEncoding; + + private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders; + + private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders; + + private static ArrayList<ArchiveStreamProvider> findArchiveStreamProviders() { + return Lists.newArrayList(serviceLoaderIterator()); + } + + static void putAll(Set<String> names, ArchiveStreamProvider provider, + TreeMap<String, ArchiveStreamProvider> map) { + for (String name : names) { + map.put(toKey(name), provider); + } + } + + private static Iterator<ArchiveStreamProvider> serviceLoaderIterator() { + return new ServiceLoaderIterator<>(ArchiveStreamProvider.class); + } + + private static String toKey(final String name) { + return name.toUpperCase(Locale.ROOT); + } + + /** + * Constructs a new sorted map from input stream provider names to provider + * objects. + * + * <p> + * The map returned by this method will have one entry for each provider for + * which support is available in the current Java virtual machine. If two or + * more supported provider have the same name then the resulting map will + * contain just one of them; which one it will contain is not specified. + * </p> + * + * <p> + * The invocation of this method, and the subsequent use of the resulting + * map, may cause time-consuming disk or network I/O operations to occur. + * This method is provided for applications that need to enumerate all of + * the available providers, for example to allow user provider selection. + * </p> + * + * <p> + * This method may return different results at different times if new + * providers are dynamically made available to the current Java virtual + * machine. + * </p> + * + * @return An immutable, map from names to provider objects + * @since 1.13 + */ + public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() { + return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() { + @Override + public SortedMap<String, ArchiveStreamProvider> run() { + TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); + putAll(SINGLETON.getInputStreamArchiveNames(), SINGLETON, map); + for (ArchiveStreamProvider provider : findArchiveStreamProviders()) { + putAll(provider.getInputStreamArchiveNames(), provider, map); + } + return map; + } + }); + } + + /** + * Constructs a new sorted map from output stream provider names to provider + * objects. + * + * <p> + * The map returned by this method will have one entry for each provider for + * which support is available in the current Java virtual machine. If two or + * more supported provider have the same name then the resulting map will + * contain just one of them; which one it will contain is not specified. + * </p> + * + * <p> + * The invocation of this method, and the subsequent use of the resulting + * map, may cause time-consuming disk or network I/O operations to occur. + * This method is provided for applications that need to enumerate all of + * the available providers, for example to allow user provider selection. + * </p> + * + * <p> + * This method may return different results at different times if new + * providers are dynamically made available to the current Java virtual + * machine. + * </p> + * + * @return An immutable, map from names to provider objects + * @since 1.13 + */ + public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() { + return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() { + @Override + public SortedMap<String, ArchiveStreamProvider> run() { + TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); + putAll(SINGLETON.getOutputStreamArchiveNames(), SINGLETON, map); + for (ArchiveStreamProvider provider : findArchiveStreamProviders()) { + putAll(provider.getOutputStreamArchiveNames(), provider, map); + } + return map; + } + }); + } + + /** + * Create an instance using the platform default encoding. + */ + public ArchiveStreamFactory() { + this(null); + } + + /** + * Create an instance using the specified encoding. + * + * @param encoding the encoding to be used. + * + * @since 1.10 + */ + public ArchiveStreamFactory(final String encoding) { + super(); + this.encoding = encoding; + // Also set the original field so can continue to use it. + this.entryEncoding = encoding; + } + + /** + * Returns the encoding to use for arj, jar, zip, dump, cpio and tar + * files, or null for the archiver default. + * + * @return entry encoding, or null for the archiver default + * @since 1.5 + */ + public String getEntryEncoding() { + return entryEncoding; + } + + /** + * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default. + * + * @param entryEncoding the entry encoding, null uses the archiver default. + * @since 1.5 + * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding + * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} + * was used to specify the factory encoding. + */ + @Deprecated + public void setEntryEncoding(final String entryEncoding) { + // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway + if (encoding != null) { + throw new IllegalStateException("Cannot overide encoding set by the constructor"); + } + this.entryEncoding = entryEncoding; + } + + /** + * Creates an archive input stream from an archiver name and an input stream. + * + * @param archiverName the archive name, + * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z} + * @param in the input stream + * @return the archive input stream + * @throws ArchiveException if the archiver name is not known + * @throws StreamingNotSupportedException if the format cannot be + * read from a stream + * @throws IllegalArgumentException if the archiver name or stream is null + */ + public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in) + throws ArchiveException { + return createArchiveInputStream(archiverName, in, entryEncoding); + } + + @Override + public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in, + final String actualEncoding) throws ArchiveException { + + if (archiverName == null) { + throw new IllegalArgumentException("Archivername must not be null."); + } + + if (in == null) { + throw new IllegalArgumentException("InputStream must not be null."); + } + + if (AR.equalsIgnoreCase(archiverName)) { + return new ArArchiveInputStream(in); + } + if (ARJ.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new ArjArchiveInputStream(in, actualEncoding); + } + return new ArjArchiveInputStream(in); + } + if (ZIP.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new ZipArchiveInputStream(in, actualEncoding); + } + return new ZipArchiveInputStream(in); + } + if (TAR.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new TarArchiveInputStream(in, actualEncoding); + } + return new TarArchiveInputStream(in); + } + if (JAR.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new JarArchiveInputStream(in, actualEncoding); + } + return new JarArchiveInputStream(in); + } + if (CPIO.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new CpioArchiveInputStream(in, actualEncoding); + } + return new CpioArchiveInputStream(in); + } + if (DUMP.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new DumpArchiveInputStream(in, actualEncoding); + } + return new DumpArchiveInputStream(in); + } + if (SEVEN_Z.equalsIgnoreCase(archiverName)) { + throw new StreamingNotSupportedException(SEVEN_Z); + } + + final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName)); + if (archiveStreamProvider != null) { + return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding); + } + + throw new ArchiveException("Archiver: " + archiverName + " not found."); + } + + /** + * Creates an archive output stream from an archiver name and an output stream. + * + * @param archiverName the archive name, + * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} + * @param out the output stream + * @return the archive output stream + * @throws ArchiveException if the archiver name is not known + * @throws StreamingNotSupportedException if the format cannot be + * written to a stream + * @throws IllegalArgumentException if the archiver name or stream is null + */ + public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out) + throws ArchiveException { + return createArchiveOutputStream(archiverName, out, entryEncoding); + } + + @Override + public ArchiveOutputStream createArchiveOutputStream( + final String archiverName, final OutputStream out, final String actualEncoding) + throws ArchiveException { + if (archiverName == null) { + throw new IllegalArgumentException("Archivername must not be null."); + } + if (out == null) { + throw new IllegalArgumentException("OutputStream must not be null."); + } + + if (AR.equalsIgnoreCase(archiverName)) { + return new ArArchiveOutputStream(out); + } + if (ZIP.equalsIgnoreCase(archiverName)) { + final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); + if (actualEncoding != null) { + zip.setEncoding(actualEncoding); + } + return zip; + } + if (TAR.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new TarArchiveOutputStream(out, actualEncoding); + } + return new TarArchiveOutputStream(out); + } + if (JAR.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new JarArchiveOutputStream(out, actualEncoding); + } + return new JarArchiveOutputStream(out); + } + if (CPIO.equalsIgnoreCase(archiverName)) { + if (actualEncoding != null) { + return new CpioArchiveOutputStream(out, actualEncoding); + } + return new CpioArchiveOutputStream(out); + } + if (SEVEN_Z.equalsIgnoreCase(archiverName)) { + throw new StreamingNotSupportedException(SEVEN_Z); + } + + final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName)); + if (archiveStreamProvider != null) { + return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding); + } + + throw new ArchiveException("Archiver: " + archiverName + " not found."); + } + + /** + * Create an archive input stream from an input stream, autodetecting + * the archive type from the first few bytes of the stream. The InputStream + * must support marks, like BufferedInputStream. + * + * @param in the input stream + * @return the archive input stream + * @throws ArchiveException if the archiver name is not known + * @throws StreamingNotSupportedException if the format cannot be + * read from a stream + * @throws IllegalArgumentException if the stream is null or does not support mark + */ + public ArchiveInputStream createArchiveInputStream(final InputStream in) + throws ArchiveException { + return createArchiveInputStream(detect(in), in); + } + + /** + * Try to determine the type of Archiver + * @param in input stream + * @return type of archiver if found + * @throws ArchiveException if an archiver cannot be detected in the stream + * @since 1.14 + */ + public static String detect(InputStream in) throws ArchiveException { + if (in == null) { + throw new IllegalArgumentException("Stream must not be null."); + } + + if (!in.markSupported()) { + throw new IllegalArgumentException("Mark is not supported."); + } + + final byte[] signature = new byte[SIGNATURE_SIZE]; + in.mark(signature.length); + int signatureLength = -1; + try { + signatureLength = IOUtils.readFully(in, signature); + in.reset(); + } catch (IOException e) { + throw new ArchiveException("IOException while reading signature.", e); + } + + if (ZipArchiveInputStream.matches(signature, signatureLength)) { + return ZIP; + } else if (JarArchiveInputStream.matches(signature, signatureLength)) { + return JAR; + } else if (ArArchiveInputStream.matches(signature, signatureLength)) { + return AR; + } else if (CpioArchiveInputStream.matches(signature, signatureLength)) { + return CPIO; + } else if (ArjArchiveInputStream.matches(signature, signatureLength)) { + return ARJ; + } else if (SevenZFile.matches(signature, signatureLength)) { + return SEVEN_Z; + } + + // Dump needs a bigger buffer to check the signature; + final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE]; + in.mark(dumpsig.length); + try { + signatureLength = IOUtils.readFully(in, dumpsig); + in.reset(); + } catch (IOException e) { + throw new ArchiveException("IOException while reading dump signature", e); + } + if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { + return DUMP; + } + + // Tar needs an even bigger buffer to check the signature; read the first block + final byte[] tarHeader = new byte[TAR_HEADER_SIZE]; + in.mark(tarHeader.length); + try { + signatureLength = IOUtils.readFully(in, tarHeader); + in.reset(); + } catch (IOException e) { + throw new ArchiveException("IOException while reading tar signature", e); + } + if (TarArchiveInputStream.matches(tarHeader, signatureLength)) { + return TAR; + } + + // COMPRESS-117 - improve auto-recognition + if (signatureLength >= TAR_HEADER_SIZE) { + TarArchiveInputStream tais = null; + try { + tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader)); + // COMPRESS-191 - verify the header checksum + if (tais.getNextTarEntry().isCheckSumOK()) { + return TAR; + } + } catch (final Exception e) { // NOPMD // NOSONAR + // can generate IllegalArgumentException as well + // as IOException + // autodetection, simply not a TAR + // ignored + } finally { + IOUtils.closeQuietly(tais); + } + } + throw new ArchiveException("No Archiver found for the stream signature"); + } + + public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() { + if (archiveInputStreamProviders == null) { + archiveInputStreamProviders = Collections + .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders()); + } + return archiveInputStreamProviders; + } + + public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() { + if (archiveOutputStreamProviders == null) { + archiveOutputStreamProviders = Collections + .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders()); + } + return archiveOutputStreamProviders; + } + + @Override + public Set<String> getInputStreamArchiveNames() { + return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z); + } + + @Override + public Set<String> getOutputStreamArchiveNames() { + return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamProvider.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamProvider.java new file mode 100644 index 000000000..eb1862c9d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamProvider.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers; + +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Set; + +/** + * Creates Archive {@link ArchiveInputStream}s and {@link ArchiveOutputStream}s. + * + * @since 1.13 + */ +public interface ArchiveStreamProvider { + + /** + * Creates an archive input stream from an archiver name and an input + * stream. + * + * @param name + * the archive name, i.e. + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#AR}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#ARJ}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#ZIP}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#TAR}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#JAR}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#CPIO}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#DUMP} + * or + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#SEVEN_Z} + * @param in + * the input stream + * @param encoding + * encoding name or null for the default + * @return the archive input stream + * @throws ArchiveException + * if the archiver name is not known + * @throws StreamingNotSupportedException + * if the format cannot be read from a stream + * @throws IllegalArgumentException + * if the archiver name or stream is null + */ + ArchiveInputStream createArchiveInputStream(final String name, final InputStream in, final String encoding) + throws ArchiveException; + + /** + * Creates an archive output stream from an archiver name and an output + * stream. + * + * @param name + * the archive name, i.e. + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#AR}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#ZIP}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#TAR}, + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#JAR} + * or + * {@value org.apache.commons.compress.archivers.ArchiveStreamFactory#CPIO} + * @param out + * the output stream + * @param encoding + * encoding name or null for the default + * @return the archive output stream + * @throws ArchiveException + * if the archiver name is not known + * @throws StreamingNotSupportedException + * if the format cannot be written to a stream + * @throws IllegalArgumentException + * if the archiver name or stream is null + */ + ArchiveOutputStream createArchiveOutputStream(final String name, final OutputStream out, final String encoding) + throws ArchiveException; + + /** + * Gets all the input stream archive names for this provider + * + * @return all the input archive names for this provider + */ + Set<String> getInputStreamArchiveNames(); + + /** + * Gets all the output stream archive names for this provider + * + * @return all the output archive names for this provider + */ + Set<String> getOutputStreamArchiveNames(); + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/EntryStreamOffsets.java b/src/main/java/org/apache/commons/compress/archivers/EntryStreamOffsets.java new file mode 100644 index 000000000..a73d079e2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/EntryStreamOffsets.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + + +/** + * Provides information about ArchiveEntry stream offsets. + */ +public interface EntryStreamOffsets { + + /** Special value indicating that the offset is unknown. */ + long OFFSET_UNKNOWN = -1; + + /** + * Gets the offset of data stream within the archive file, + * + * @return + * the offset of entry data stream, {@code OFFSET_UNKNOWN} if not known. + */ + long getDataOffset(); + + /** + * Indicates whether the stream is contiguous, i.e. not split among + * several archive parts, interspersed with control blocks, etc. + * + * @return + * true if stream is contiguous, false otherwise. + */ + boolean isStreamContiguous(); +} diff --git a/src/main/java/org/apache/commons/compress/archivers/Lister.java b/src/main/java/org/apache/commons/compress/archivers/Lister.java new file mode 100644 index 000000000..07a8e9c07 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/Lister.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import org.apache.commons.compress.archivers.sevenz.SevenZFile; + +/** + * Simple command line application that lists the contents of an archive. + * + * <p>The name of the archive must be given as a command line argument.</p> + * <p>The optional second argument defines the archive type, in case the format is not recognized.</p> + * + * @since 1.1 + */ +public final class Lister { + private static final ArchiveStreamFactory factory = new ArchiveStreamFactory(); + + public static void main(final String[] args) throws Exception { + if (args.length == 0) { + usage(); + return; + } + System.out.println("Analysing " + args[0]); + final File f = new File(args[0]); + if (!f.isFile()) { + System.err.println(f + " doesn't exist or is a directory"); + } + String format = args.length > 1 ? args[1] : detectFormat(f); + if (ArchiveStreamFactory.SEVEN_Z.equalsIgnoreCase(format)) { + list7z(f); + } else { + listStream(f, args); + } + } + + private static void listStream(File f, String[] args) throws ArchiveException, IOException { + try (final InputStream fis = new BufferedInputStream(Files.newInputStream(f.toPath())); + final ArchiveInputStream ais = createArchiveInputStream(args, fis)) { + System.out.println("Created " + ais.toString()); + ArchiveEntry ae; + while ((ae = ais.getNextEntry()) != null) { + System.out.println(ae.getName()); + } + } + } + + private static ArchiveInputStream createArchiveInputStream(final String[] args, final InputStream fis) + throws ArchiveException { + if (args.length > 1) { + return factory.createArchiveInputStream(args[1], fis); + } + return factory.createArchiveInputStream(fis); + } + + private static String detectFormat(File f) throws ArchiveException, IOException { + try (final InputStream fis = new BufferedInputStream(Files.newInputStream(f.toPath()))) { + return factory.detect(fis); + } + } + + private static void list7z(File f) throws ArchiveException, IOException { + try (SevenZFile z = new SevenZFile(f)) { + System.out.println("Created " + z.toString()); + ArchiveEntry ae; + while ((ae = z.getNextEntry()) != null) { + System.out.println(ae.getName()); + } + } + } + + private static void usage() { + System.out.println("Parameters: archive-name [archive-type]"); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/StreamingNotSupportedException.java b/src/main/java/org/apache/commons/compress/archivers/StreamingNotSupportedException.java new file mode 100644 index 000000000..9f12a7aff --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/StreamingNotSupportedException.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +/** + * Exception thrown by ArchiveStreamFactory if a format is requested/detected that doesn't support streaming. + * + * @since 1.8 + */ +public class StreamingNotSupportedException extends ArchiveException { + + private static final long serialVersionUID = 1L; + + private final String format; + + /** + * Creates a new StreamingNotSupportedException. + * + * @param format the format that has been requested/detected. + */ + public StreamingNotSupportedException(final String format) { + super("The " + format + " doesn't support streaming."); + this.format = format; + } + + /** + * Returns the format that has been requested/detected. + * + * @return the format that has been requested/detected. + */ + public String getFormat() { + return format; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveEntry.java new file mode 100644 index 000000000..df9595ad2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveEntry.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.ar; + +import java.io.File; +import java.util.Date; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * Represents an archive entry in the "ar" format. + * + * Each AR archive starts with "!<arch>" followed by a LF. After these 8 bytes + * the archive entries are listed. The format of an entry header is as it follows: + * + * <pre> + * START BYTE END BYTE NAME FORMAT LENGTH + * 0 15 File name ASCII 16 + * 16 27 Modification timestamp Decimal 12 + * 28 33 Owner ID Decimal 6 + * 34 39 Group ID Decimal 6 + * 40 47 File mode Octal 8 + * 48 57 File size (bytes) Decimal 10 + * 58 59 File magic \140\012 2 + * </pre> + * + * This specifies that an ar archive entry header contains 60 bytes. + * + * Due to the limitation of the file name length to 16 bytes GNU and + * BSD has their own variants of this format. Currently Commons + * Compress can read but not write the GNU variant. It fully supports + * the BSD variant. + * + * @see <a href="https://www.freebsd.org/cgi/man.cgi?query=ar&sektion=5">ar man page</a> + * + * @Immutable + */ +public class ArArchiveEntry implements ArchiveEntry { + + /** The header for each entry */ + public static final String HEADER = "!<arch>\n"; + + /** The trailer for each entry */ + public static final String TRAILER = "`\012"; + + /** + * SVR4/GNU adds a trailing / to names; BSD does not. + * They also vary in how names longer than 16 characters are represented. + * (Not yet fully supported by this implementation) + */ + private final String name; + private final int userId; + private final int groupId; + private final int mode; + private static final int DEFAULT_MODE = 33188; // = (octal) 0100644 + private final long lastModified; + private final long length; + + /** + * Create a new instance using a couple of default values. + * + * <p>Sets userId and groupId to 0, the octal file mode to 644 and + * the last modified time to the current time.</p> + * + * @param name name of the entry + * @param length length of the entry in bytes + */ + public ArArchiveEntry(final String name, final long length) { + this(name, length, 0, 0, DEFAULT_MODE, + System.currentTimeMillis() / 1000); + } + + /** + * Create a new instance. + * + * @param name name of the entry + * @param length length of the entry in bytes + * @param userId numeric user id + * @param groupId numeric group id + * @param mode file mode + * @param lastModified last modified time in seconds since the epoch + */ + public ArArchiveEntry(final String name, final long length, final int userId, final int groupId, + final int mode, final long lastModified) { + this.name = name; + this.length = length; + this.userId = userId; + this.groupId = groupId; + this.mode = mode; + this.lastModified = lastModified; + } + + /** + * Create a new instance using the attributes of the given file + * @param inputFile the file to create an entry from + * @param entryName the name of the entry + */ + public ArArchiveEntry(final File inputFile, final String entryName) { + // TODO sort out mode + this(entryName, inputFile.isFile() ? inputFile.length() : 0, + 0, 0, DEFAULT_MODE, inputFile.lastModified() / 1000); + } + + @Override + public long getSize() { + return this.getLength(); + } + + @Override + public String getName() { + return name; + } + + public int getUserId() { + return userId; + } + + public int getGroupId() { + return groupId; + } + + public int getMode() { + return mode; + } + + /** + * Last modified time in seconds since the epoch. + * @return the last modified date + */ + public long getLastModified() { + return lastModified; + } + + @Override + public Date getLastModifiedDate() { + return new Date(1000 * getLastModified()); + } + + public long getLength() { + return length; + } + + @Override + public boolean isDirectory() { + return false; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (name == null ? 0 : name.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + final ArArchiveEntry other = (ArArchiveEntry) obj; + if (name == null) { + return other.name == null; + } else { + return name.equals(other.name); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveInputStream.java new file mode 100644 index 000000000..206d3881a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveInputStream.java @@ -0,0 +1,407 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.ar; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.utils.ArchiveUtils; +import org.apache.commons.compress.utils.IOUtils; + +/** + * Implements the "ar" archive format as an input stream. + * + * @NotThreadSafe + * + */ +public class ArArchiveInputStream extends ArchiveInputStream { + + private final InputStream input; + private long offset = 0; + private boolean closed; + + /* + * If getNextEnxtry has been called, the entry metadata is stored in + * currentEntry. + */ + private ArArchiveEntry currentEntry = null; + + // Storage area for extra long names (GNU ar) + private byte[] namebuffer = null; + + /* + * The offset where the current entry started. -1 if no entry has been + * called + */ + private long entryOffset = -1; + + // offsets and length of meta data parts + private static final int NAME_OFFSET = 0; + private static final int NAME_LEN = 16; + private static final int LAST_MODIFIED_OFFSET = NAME_LEN; + private static final int LAST_MODIFIED_LEN = 12; + private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN; + private static final int USER_ID_LEN = 6; + private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN; + private static final int GROUP_ID_LEN = 6; + private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN; + private static final int FILE_MODE_LEN = 8; + private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN; + private static final int LENGTH_LEN = 10; + + // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) + private final byte[] metaData = + new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN]; + + /** + * Constructs an Ar input stream with the referenced stream + * + * @param pInput + * the ar input stream + */ + public ArArchiveInputStream(final InputStream pInput) { + input = pInput; + closed = false; + } + + /** + * Returns the next AR entry in this stream. + * + * @return the next AR entry. + * @throws IOException + * if the entry could not be read + */ + public ArArchiveEntry getNextArEntry() throws IOException { + if (currentEntry != null) { + final long entryEnd = entryOffset + currentEntry.getLength(); + long skipped = IOUtils.skip(input, entryEnd - offset); + trackReadBytes(skipped); + currentEntry = null; + } + + if (offset == 0) { + final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); + final byte[] realized = new byte[expected.length]; + final int read = IOUtils.readFully(input, realized); + trackReadBytes(read); + if (read != expected.length) { + throw new IOException("failed to read header. Occured at byte: " + getBytesRead()); + } + for (int i = 0; i < expected.length; i++) { + if (expected[i] != realized[i]) { + throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized)); + } + } + } + + if (offset % 2 != 0) { + if (input.read() < 0) { + // hit eof + return null; + } + trackReadBytes(1); + } + + if (input.available() == 0) { + return null; + } + + { + final int read = IOUtils.readFully(input, metaData); + trackReadBytes(read); + if (read < metaData.length) { + throw new IOException("truncated ar archive"); + } + } + + { + final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); + final byte[] realized = new byte[expected.length]; + final int read = IOUtils.readFully(input, realized); + trackReadBytes(read); + if (read != expected.length) { + throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead()); + } + for (int i = 0; i < expected.length; i++) { + if (expected[i] != realized[i]) { + throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead()); + } + } + } + + entryOffset = offset; + +// GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename. + + // entry name is stored as ASCII string + String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim(); + if (isGNUStringTable(temp)) { // GNU extended filenames entry + currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN); + return getNextArEntry(); + } + + long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN); + if (temp.endsWith("/")) { // GNU terminator + temp = temp.substring(0, temp.length() - 1); + } else if (isGNULongName(temp)) { + final int off = Integer.parseInt(temp.substring(1));// get the offset + temp = getExtendedName(off); // convert to the long name + } else if (isBSDLongName(temp)) { + temp = getBSDLongName(temp); + // entry length contained the length of the file name in + // addition to the real length of the entry. + // assume file name was ASCII, there is no "standard" otherwise + final int nameLen = temp.length(); + len -= nameLen; + entryOffset += nameLen; + } + + currentEntry = new ArArchiveEntry(temp, len, + asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true), + asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), + asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8), + asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN)); + return currentEntry; + } + + /** + * Get an extended name from the GNU extended name buffer. + * + * @param offset pointer to entry within the buffer + * @return the extended file name; without trailing "/" if present. + * @throws IOException if name not found or buffer not set up + */ + private String getExtendedName(final int offset) throws IOException { + if (namebuffer == null) { + throw new IOException("Cannot process GNU long filename as no // record was found"); + } + for (int i = offset; i < namebuffer.length; i++) { + if (namebuffer[i] == '\012' || namebuffer[i] == 0) { + if (namebuffer[i - 1] == '/') { + i--; // drop trailing / + } + return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset); + } + } + throw new IOException("Failed to read entry: " + offset); + } + + private long asLong(final byte[] byteArray, int offset, int len) { + return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim()); + } + + private int asInt(final byte[] byteArray, int offset, int len) { + return asInt(byteArray, offset, len, 10, false); + } + + private int asInt(final byte[] byteArray, int offset, int len, final boolean treatBlankAsZero) { + return asInt(byteArray, offset, len, 10, treatBlankAsZero); + } + + private int asInt(final byte[] byteArray, int offset, int len, final int base) { + return asInt(byteArray, offset, len, base, false); + } + + private int asInt(final byte[] byteArray, int offset, int len, final int base, final boolean treatBlankAsZero) { + final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim(); + if (string.length() == 0 && treatBlankAsZero) { + return 0; + } + return Integer.parseInt(string, base); + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() + */ + @Override + public ArchiveEntry getNextEntry() throws IOException { + return getNextArEntry(); + } + + /* + * (non-Javadoc) + * + * @see java.io.InputStream#close() + */ + @Override + public void close() throws IOException { + if (!closed) { + closed = true; + input.close(); + } + currentEntry = null; + } + + /* + * (non-Javadoc) + * + * @see java.io.InputStream#read(byte[], int, int) + */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (currentEntry == null) { + throw new IllegalStateException("No current ar entry"); + } + final long entryEnd = entryOffset + currentEntry.getLength(); + if (len < 0 || offset >= entryEnd) { + return -1; + } + final int toRead = (int) Math.min(len, entryEnd - offset); + final int ret = this.input.read(b, off, toRead); + trackReadBytes(ret); + return ret; + } + + /** + * Checks if the signature matches ASCII "!<arch>" followed by a single LF + * control character + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is an Ar archive stream, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + // 3c21 7261 6863 0a3e + + return length >= 8 && signature[0] == 0x21 && + signature[1] == 0x3c && signature[2] == 0x61 && + signature[3] == 0x72 && signature[4] == 0x63 && + signature[5] == 0x68 && signature[6] == 0x3e && + signature[7] == 0x0a; + } + + static final String BSD_LONGNAME_PREFIX = "#1/"; + private static final int BSD_LONGNAME_PREFIX_LEN = + BSD_LONGNAME_PREFIX.length(); + private static final String BSD_LONGNAME_PATTERN = + "^" + BSD_LONGNAME_PREFIX + "\\d+"; + + /** + * Does the name look like it is a long name (or a name containing + * spaces) as encoded by BSD ar? + * + * <p>From the FreeBSD ar(5) man page:</p> + * <pre> + * BSD In the BSD variant, names that are shorter than 16 + * characters and without embedded spaces are stored + * directly in this field. If a name has an embedded + * space, or if it is longer than 16 characters, then + * the string "#1/" followed by the decimal represen- + * tation of the length of the file name is placed in + * this field. The actual file name is stored immedi- + * ately after the archive header. The content of the + * archive member follows the file name. The ar_size + * field of the header (see below) will then hold the + * sum of the size of the file name and the size of + * the member. + * </pre> + * + * @since 1.3 + */ + private static boolean isBSDLongName(final String name) { + return name != null && name.matches(BSD_LONGNAME_PATTERN); + } + + /** + * Reads the real name from the current stream assuming the very + * first bytes to be read are the real file name. + * + * @see #isBSDLongName + * + * @since 1.3 + */ + private String getBSDLongName(final String bsdLongName) throws IOException { + final int nameLen = + Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); + final byte[] name = new byte[nameLen]; + final int read = IOUtils.readFully(input, name); + trackReadBytes(read); + if (read != nameLen) { + throw new EOFException(); + } + return ArchiveUtils.toAsciiString(name); + } + + private static final String GNU_STRING_TABLE_NAME = "//"; + + /** + * Is this the name of the "Archive String Table" as used by + * SVR4/GNU to store long file names? + * + * <p>GNU ar stores multiple extended filenames in the data section + * of a file with the name "//", this record is referred to by + * future headers.</p> + * + * <p>A header references an extended filename by storing a "/" + * followed by a decimal offset to the start of the filename in + * the extended filename data section.</p> + * + * <p>The format of the "//" file itself is simply a list of the + * long filenames, each separated by one or more LF + * characters. Note that the decimal offsets are number of + * characters, not line or string number within the "//" file.</p> + */ + private static boolean isGNUStringTable(final String name) { + return GNU_STRING_TABLE_NAME.equals(name); + } + + private void trackReadBytes(final long read) { + count(read); + if (read > 0) { + offset += read; + } + } + + /** + * Reads the GNU archive String Table. + * + * @see #isGNUStringTable + */ + private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException { + final int bufflen = asInt(length, offset, len); // Assume length will fit in an int + namebuffer = new byte[bufflen]; + final int read = IOUtils.readFully(input, namebuffer, 0, bufflen); + trackReadBytes(read); + if (read != bufflen){ + throw new IOException("Failed to read complete // record: expected=" + + bufflen + " read=" + read); + } + return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); + } + + private static final String GNU_LONGNAME_PATTERN = "^/\\d+"; + + /** + * Does the name look like it is a long name (or a name containing + * spaces) as encoded by SVR4/GNU ar? + * + * @see #isGNUStringTable + */ + private boolean isGNULongName(final String name) { + return name != null && name.matches(GNU_LONGNAME_PATTERN); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveOutputStream.java new file mode 100644 index 000000000..57acb71d7 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ar/ArArchiveOutputStream.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.ar; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.utils.ArchiveUtils; + +/** + * Implements the "ar" archive format as an output stream. + * + * @NotThreadSafe + */ +public class ArArchiveOutputStream extends ArchiveOutputStream { + /** Fail if a long file name is required in the archive. */ + public static final int LONGFILE_ERROR = 0; + + /** BSD ar extensions are used to store long file names in the archive. */ + public static final int LONGFILE_BSD = 1; + + private final OutputStream out; + private long entryOffset = 0; + private ArArchiveEntry prevEntry; + private boolean haveUnclosedEntry = false; + private int longFileMode = LONGFILE_ERROR; + + /** indicates if this archive is finished */ + private boolean finished = false; + + public ArArchiveOutputStream( final OutputStream pOut ) { + this.out = pOut; + } + + /** + * Set the long file mode. + * This can be LONGFILE_ERROR(0) or LONGFILE_BSD(1). + * This specifies the treatment of long file names (names >= 16). + * Default is LONGFILE_ERROR. + * @param longFileMode the mode to use + * @since 1.3 + */ + public void setLongFileMode(final int longFileMode) { + this.longFileMode = longFileMode; + } + + private long writeArchiveHeader() throws IOException { + final byte [] header = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); + out.write(header); + return header.length; + } + + @Override + public void closeArchiveEntry() throws IOException { + if(finished) { + throw new IOException("Stream has already been finished"); + } + if (prevEntry == null || !haveUnclosedEntry){ + throw new IOException("No current entry to close"); + } + if (entryOffset % 2 != 0) { + out.write('\n'); // Pad byte + } + haveUnclosedEntry = false; + } + + @Override + public void putArchiveEntry( final ArchiveEntry pEntry ) throws IOException { + if(finished) { + throw new IOException("Stream has already been finished"); + } + + final ArArchiveEntry pArEntry = (ArArchiveEntry)pEntry; + if (prevEntry == null) { + writeArchiveHeader(); + } else { + if (prevEntry.getLength() != entryOffset) { + throw new IOException("length does not match entry (" + prevEntry.getLength() + " != " + entryOffset); + } + + if (haveUnclosedEntry) { + closeArchiveEntry(); + } + } + + prevEntry = pArEntry; + + writeEntryHeader(pArEntry); + + entryOffset = 0; + haveUnclosedEntry = true; + } + + private long fill( final long pOffset, final long pNewOffset, final char pFill ) throws IOException { + final long diff = pNewOffset - pOffset; + + if (diff > 0) { + for (int i = 0; i < diff; i++) { + write(pFill); + } + } + + return pNewOffset; + } + + private long write( final String data ) throws IOException { + final byte[] bytes = data.getBytes("ascii"); + write(bytes); + return bytes.length; + } + + private long writeEntryHeader( final ArArchiveEntry pEntry ) throws IOException { + + long offset = 0; + boolean mustAppendName = false; + + final String n = pEntry.getName(); + if (LONGFILE_ERROR == longFileMode && n.length() > 16) { + throw new IOException("filename too long, > 16 chars: "+n); + } + if (LONGFILE_BSD == longFileMode && + (n.length() > 16 || n.contains(" "))) { + mustAppendName = true; + offset += write(ArArchiveInputStream.BSD_LONGNAME_PREFIX + + String.valueOf(n.length())); + } else { + offset += write(n); + } + + offset = fill(offset, 16, ' '); + final String m = "" + pEntry.getLastModified(); + if (m.length() > 12) { + throw new IOException("modified too long"); + } + offset += write(m); + + offset = fill(offset, 28, ' '); + final String u = "" + pEntry.getUserId(); + if (u.length() > 6) { + throw new IOException("userid too long"); + } + offset += write(u); + + offset = fill(offset, 34, ' '); + final String g = "" + pEntry.getGroupId(); + if (g.length() > 6) { + throw new IOException("groupid too long"); + } + offset += write(g); + + offset = fill(offset, 40, ' '); + final String fm = "" + Integer.toString(pEntry.getMode(), 8); + if (fm.length() > 8) { + throw new IOException("filemode too long"); + } + offset += write(fm); + + offset = fill(offset, 48, ' '); + final String s = + String.valueOf(pEntry.getLength() + + (mustAppendName ? n.length() : 0)); + if (s.length() > 10) { + throw new IOException("size too long"); + } + offset += write(s); + + offset = fill(offset, 58, ' '); + + offset += write(ArArchiveEntry.TRAILER); + + if (mustAppendName) { + offset += write(n); + } + + return offset; + } + + @Override + public void write(final byte[] b, final int off, final int len) throws IOException { + out.write(b, off, len); + count(len); + entryOffset += len; + } + + /** + * Calls finish if necessary, and then closes the OutputStream + */ + @Override + public void close() throws IOException { + try { + if (!finished) { + finish(); + } + } finally { + out.close(); + prevEntry = null; + } + } + + @Override + public ArchiveEntry createArchiveEntry(final File inputFile, final String entryName) + throws IOException { + if(finished) { + throw new IOException("Stream has already been finished"); + } + return new ArArchiveEntry(inputFile, entryName); + } + + @Override + public void finish() throws IOException { + if(haveUnclosedEntry) { + throw new IOException("This archive contains unclosed entries."); + } else if(finished) { + throw new IOException("This archive has already been finished"); + } + finished = true; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ar/package.html b/src/main/java/org/apache/commons/compress/archivers/ar/package.html new file mode 100644 index 000000000..9c80f96e1 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ar/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for reading and writing archives using + the AR format.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/arj/ArjArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/arj/ArjArchiveEntry.java new file mode 100644 index 000000000..ab847db85 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/arj/ArjArchiveEntry.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.arj; + +import java.io.File; +import java.util.Date; +import java.util.regex.Matcher; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipUtil; + +/** + * An entry in an ARJ archive. + * + * @NotThreadSafe + * @since 1.6 + */ +public class ArjArchiveEntry implements ArchiveEntry { + private final LocalFileHeader localFileHeader; + + public ArjArchiveEntry() { + localFileHeader = new LocalFileHeader(); + } + + ArjArchiveEntry(final LocalFileHeader localFileHeader) { + this.localFileHeader = localFileHeader; + } + + /** + * Get this entry's name. + * + * <p>This method returns the raw name as it is stored inside of the archive.</p> + * + * @return This entry's name. + */ + @Override + public String getName() { + if ((localFileHeader.arjFlags & LocalFileHeader.Flags.PATHSYM) != 0) { + return localFileHeader.name.replaceAll("/", + Matcher.quoteReplacement(File.separator)); + } + return localFileHeader.name; + } + + /** + * Get this entry's file size. + * + * @return This entry's file size. + */ + @Override + public long getSize() { + return localFileHeader.originalSize; + } + + /** True if the entry refers to a directory. + * + * @return True if the entry refers to a directory + */ + @Override + public boolean isDirectory() { + return localFileHeader.fileType == LocalFileHeader.FileTypes.DIRECTORY; + } + + /** + * The last modified date of the entry. + * + * <p>Note the interpretation of time is different depending on + * the HostOS that has created the archive. While an OS that is + * {@link #isHostOsUnix considered to be Unix} stores time in a + * timezone independent manner, other platforms only use the local + * time. I.e. if an archive has been created at midnight UTC on a + * machine in timezone UTC this method will return midnight + * regardless of timezone if the archive has been created on a + * non-Unix system and a time taking the current timezone into + * account if the archive has beeen created on Unix.</p> + * + * @return the last modified date + */ + @Override + public Date getLastModifiedDate() { + final long ts = isHostOsUnix() ? localFileHeader.dateTimeModified * 1000L + : ZipUtil.dosToJavaTime(0xFFFFFFFFL & localFileHeader.dateTimeModified); + return new Date(ts); + } + + /** + * File mode of this entry. + * + * <p>The format depends on the host os that created the entry.</p> + * + * @return the file mode + */ + public int getMode() { + return localFileHeader.fileAccessMode; + } + + /** + * File mode of this entry as Unix stat value. + * + * <p>Will only be non-zero of the host os was UNIX. + * + * @return the Unix mode + */ + public int getUnixMode() { + return isHostOsUnix() ? getMode() : 0; + } + + /** + * The operating system the archive has been created on. + * @see HostOs + * @return the host OS code + */ + public int getHostOs() { + return localFileHeader.hostOS; + } + + /** + * Is the operating system the archive has been created on one + * that is considered a UNIX OS by arj? + * @return whether the operating system the archive has been + * created on is considered a UNIX OS by arj + */ + public boolean isHostOsUnix() { + return getHostOs() == HostOs.UNIX || getHostOs() == HostOs.NEXT; + } + + int getMethod() { + return localFileHeader.method; + } + + /** + * The known values for HostOs. + */ + public static class HostOs { + public static final int DOS = 0; + public static final int PRIMOS = 1; + public static final int UNIX = 2; + public static final int AMIGA = 3; + public static final int MAC_OS = 4; + public static final int OS_2 = 5; + public static final int APPLE_GS = 6; + public static final int ATARI_ST = 7; + public static final int NEXT = 8; + public static final int VAX_VMS = 9; + public static final int WIN95 = 10; + public static final int WIN32 = 11; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/arj/ArjArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/arj/ArjArchiveInputStream.java new file mode 100644 index 000000000..c22d4c047 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/arj/ArjArchiveInputStream.java @@ -0,0 +1,353 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.arj; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.zip.CRC32; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.utils.BoundedInputStream; +import org.apache.commons.compress.utils.CRC32VerifyingInputStream; +import org.apache.commons.compress.utils.IOUtils; + +/** + * Implements the "arj" archive format as an InputStream. + * <p> + * <a href="http://farmanager.com/svn/trunk/plugins/multiarc/arc.doc/arj.txt">Reference</a> + * @NotThreadSafe + * @since 1.6 + */ +public class ArjArchiveInputStream extends ArchiveInputStream { + private static final int ARJ_MAGIC_1 = 0x60; + private static final int ARJ_MAGIC_2 = 0xEA; + private final DataInputStream in; + private final String charsetName; + private final MainHeader mainHeader; + private LocalFileHeader currentLocalFileHeader = null; + private InputStream currentInputStream = null; + + /** + * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in. + * @param inputStream the underlying stream, whose ownership is taken + * @param charsetName the charset used for file names and comments + * in the archive. May be {@code null} to use the platform default. + * @throws ArchiveException if an exception occurs while reading + */ + public ArjArchiveInputStream(final InputStream inputStream, + final String charsetName) throws ArchiveException { + in = new DataInputStream(inputStream); + this.charsetName = charsetName; + try { + mainHeader = readMainHeader(); + if ((mainHeader.arjFlags & MainHeader.Flags.GARBLED) != 0) { + throw new ArchiveException("Encrypted ARJ files are unsupported"); + } + if ((mainHeader.arjFlags & MainHeader.Flags.VOLUME) != 0) { + throw new ArchiveException("Multi-volume ARJ files are unsupported"); + } + } catch (final IOException ioException) { + throw new ArchiveException(ioException.getMessage(), ioException); + } + } + + /** + * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in, + * and using the CP437 character encoding. + * @param inputStream the underlying stream, whose ownership is taken + * @throws ArchiveException if an exception occurs while reading + */ + public ArjArchiveInputStream(final InputStream inputStream) + throws ArchiveException { + this(inputStream, "CP437"); + } + + @Override + public void close() throws IOException { + in.close(); + } + + private int read8(final DataInputStream dataIn) throws IOException { + final int value = dataIn.readUnsignedByte(); + count(1); + return value; + } + + private int read16(final DataInputStream dataIn) throws IOException { + final int value = dataIn.readUnsignedShort(); + count(2); + return Integer.reverseBytes(value) >>> 16; + } + + private int read32(final DataInputStream dataIn) throws IOException { + final int value = dataIn.readInt(); + count(4); + return Integer.reverseBytes(value); + } + + private String readString(final DataInputStream dataIn) throws IOException { + final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + int nextByte; + while ((nextByte = dataIn.readUnsignedByte()) != 0) { + buffer.write(nextByte); + } + if (charsetName != null) { + return new String(buffer.toByteArray(), charsetName); + } + // intentionally using the default encoding as that's the contract for a null charsetName + return new String(buffer.toByteArray()); + } + + private void readFully(final DataInputStream dataIn, final byte[] b) + throws IOException { + dataIn.readFully(b); + count(b.length); + } + + private byte[] readHeader() throws IOException { + boolean found = false; + byte[] basicHeaderBytes = null; + do { + int first = 0; + int second = read8(in); + do { + first = second; + second = read8(in); + } while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2); + final int basicHeaderSize = read16(in); + if (basicHeaderSize == 0) { + // end of archive + return null; + } + if (basicHeaderSize <= 2600) { + basicHeaderBytes = new byte[basicHeaderSize]; + readFully(in, basicHeaderBytes); + final long basicHeaderCrc32 = read32(in) & 0xFFFFFFFFL; + final CRC32 crc32 = new CRC32(); + crc32.update(basicHeaderBytes); + if (basicHeaderCrc32 == crc32.getValue()) { + found = true; + } + } + } while (!found); + return basicHeaderBytes; + } + + private MainHeader readMainHeader() throws IOException { + final byte[] basicHeaderBytes = readHeader(); + if (basicHeaderBytes == null) { + throw new IOException("Archive ends without any headers"); + } + final DataInputStream basicHeader = new DataInputStream( + new ByteArrayInputStream(basicHeaderBytes)); + + final int firstHeaderSize = basicHeader.readUnsignedByte(); + final byte[] firstHeaderBytes = new byte[firstHeaderSize - 1]; + basicHeader.readFully(firstHeaderBytes); + final DataInputStream firstHeader = new DataInputStream( + new ByteArrayInputStream(firstHeaderBytes)); + + final MainHeader hdr = new MainHeader(); + hdr.archiverVersionNumber = firstHeader.readUnsignedByte(); + hdr.minVersionToExtract = firstHeader.readUnsignedByte(); + hdr.hostOS = firstHeader.readUnsignedByte(); + hdr.arjFlags = firstHeader.readUnsignedByte(); + hdr.securityVersion = firstHeader.readUnsignedByte(); + hdr.fileType = firstHeader.readUnsignedByte(); + hdr.reserved = firstHeader.readUnsignedByte(); + hdr.dateTimeCreated = read32(firstHeader); + hdr.dateTimeModified = read32(firstHeader); + hdr.archiveSize = 0xffffFFFFL & read32(firstHeader); + hdr.securityEnvelopeFilePosition = read32(firstHeader); + hdr.fileSpecPosition = read16(firstHeader); + hdr.securityEnvelopeLength = read16(firstHeader); + pushedBackBytes(20); // count has already counted them via readFully + hdr.encryptionVersion = firstHeader.readUnsignedByte(); + hdr.lastChapter = firstHeader.readUnsignedByte(); + + if (firstHeaderSize >= 33) { + hdr.arjProtectionFactor = firstHeader.readUnsignedByte(); + hdr.arjFlags2 = firstHeader.readUnsignedByte(); + firstHeader.readUnsignedByte(); + firstHeader.readUnsignedByte(); + } + + hdr.name = readString(basicHeader); + hdr.comment = readString(basicHeader); + + final int extendedHeaderSize = read16(in); + if (extendedHeaderSize > 0) { + hdr.extendedHeaderBytes = new byte[extendedHeaderSize]; + readFully(in, hdr.extendedHeaderBytes); + final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in); + final CRC32 crc32 = new CRC32(); + crc32.update(hdr.extendedHeaderBytes); + if (extendedHeaderCrc32 != crc32.getValue()) { + throw new IOException("Extended header CRC32 verification failure"); + } + } + + return hdr; + } + + private LocalFileHeader readLocalFileHeader() throws IOException { + final byte[] basicHeaderBytes = readHeader(); + if (basicHeaderBytes == null) { + return null; + } + try (final DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes))) { + + final int firstHeaderSize = basicHeader.readUnsignedByte(); + final byte[] firstHeaderBytes = new byte[firstHeaderSize - 1]; + basicHeader.readFully(firstHeaderBytes); + try (final DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes))) { + + final LocalFileHeader localFileHeader = new LocalFileHeader(); + localFileHeader.archiverVersionNumber = firstHeader.readUnsignedByte(); + localFileHeader.minVersionToExtract = firstHeader.readUnsignedByte(); + localFileHeader.hostOS = firstHeader.readUnsignedByte(); + localFileHeader.arjFlags = firstHeader.readUnsignedByte(); + localFileHeader.method = firstHeader.readUnsignedByte(); + localFileHeader.fileType = firstHeader.readUnsignedByte(); + localFileHeader.reserved = firstHeader.readUnsignedByte(); + localFileHeader.dateTimeModified = read32(firstHeader); + localFileHeader.compressedSize = 0xffffFFFFL & read32(firstHeader); + localFileHeader.originalSize = 0xffffFFFFL & read32(firstHeader); + localFileHeader.originalCrc32 = 0xffffFFFFL & read32(firstHeader); + localFileHeader.fileSpecPosition = read16(firstHeader); + localFileHeader.fileAccessMode = read16(firstHeader); + pushedBackBytes(20); + localFileHeader.firstChapter = firstHeader.readUnsignedByte(); + localFileHeader.lastChapter = firstHeader.readUnsignedByte(); + + readExtraData(firstHeaderSize, firstHeader, localFileHeader); + + localFileHeader.name = readString(basicHeader); + localFileHeader.comment = readString(basicHeader); + + final ArrayList<byte[]> extendedHeaders = new ArrayList<>(); + int extendedHeaderSize; + while ((extendedHeaderSize = read16(in)) > 0) { + final byte[] extendedHeaderBytes = new byte[extendedHeaderSize]; + readFully(in, extendedHeaderBytes); + final long extendedHeaderCrc32 = 0xffffFFFFL & read32(in); + final CRC32 crc32 = new CRC32(); + crc32.update(extendedHeaderBytes); + if (extendedHeaderCrc32 != crc32.getValue()) { + throw new IOException("Extended header CRC32 verification failure"); + } + extendedHeaders.add(extendedHeaderBytes); + } + localFileHeader.extendedHeaders = extendedHeaders.toArray(new byte[extendedHeaders.size()][]); + + return localFileHeader; + } + } + } + + private void readExtraData(final int firstHeaderSize, final DataInputStream firstHeader, + final LocalFileHeader localFileHeader) throws IOException { + if (firstHeaderSize >= 33) { + localFileHeader.extendedFilePosition = read32(firstHeader); + if (firstHeaderSize >= 45) { + localFileHeader.dateTimeAccessed = read32(firstHeader); + localFileHeader.dateTimeCreated = read32(firstHeader); + localFileHeader.originalSizeEvenForVolumes = read32(firstHeader); + pushedBackBytes(12); + } + pushedBackBytes(4); + } + } + + /** + * Checks if the signature matches what is expected for an arj file. + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is an arj archive stream, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + return length >= 2 && + (0xff & signature[0]) == ARJ_MAGIC_1 && + (0xff & signature[1]) == ARJ_MAGIC_2; + } + + /** + * Gets the archive's recorded name. + * @return the archive's name + */ + public String getArchiveName() { + return mainHeader.name; + } + + /** + * Gets the archive's comment. + * @return the archive's comment + */ + public String getArchiveComment() { + return mainHeader.comment; + } + + @Override + public ArjArchiveEntry getNextEntry() throws IOException { + if (currentInputStream != null) { + // return value ignored as IOUtils.skip ensures the stream is drained completely + IOUtils.skip(currentInputStream, Long.MAX_VALUE); + currentInputStream.close(); + currentLocalFileHeader = null; + currentInputStream = null; + } + + currentLocalFileHeader = readLocalFileHeader(); + if (currentLocalFileHeader != null) { + currentInputStream = new BoundedInputStream(in, currentLocalFileHeader.compressedSize); + if (currentLocalFileHeader.method == LocalFileHeader.Methods.STORED) { + currentInputStream = new CRC32VerifyingInputStream(currentInputStream, + currentLocalFileHeader.originalSize, currentLocalFileHeader.originalCrc32); + } + return new ArjArchiveEntry(currentLocalFileHeader); + } + currentInputStream = null; + return null; + } + + @Override + public boolean canReadEntryData(final ArchiveEntry ae) { + return ae instanceof ArjArchiveEntry + && ((ArjArchiveEntry) ae).getMethod() == LocalFileHeader.Methods.STORED; + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (currentLocalFileHeader == null) { + throw new IllegalStateException("No current arj entry"); + } + if (currentLocalFileHeader.method != LocalFileHeader.Methods.STORED) { + throw new IOException("Unsupported compression method " + currentLocalFileHeader.method); + } + return currentInputStream.read(b, off, len); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/arj/LocalFileHeader.java b/src/main/java/org/apache/commons/compress/archivers/arj/LocalFileHeader.java new file mode 100644 index 000000000..6ecb297be --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/arj/LocalFileHeader.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.arj; + +import java.util.Arrays; + +class LocalFileHeader { + int archiverVersionNumber; + int minVersionToExtract; + int hostOS; + int arjFlags; + int method; + int fileType; + int reserved; + int dateTimeModified; + long compressedSize; + long originalSize; + long originalCrc32; + int fileSpecPosition; + int fileAccessMode; + int firstChapter; + int lastChapter; + + int extendedFilePosition; + int dateTimeAccessed; + int dateTimeCreated; + int originalSizeEvenForVolumes; + + String name; + String comment; + + byte[][] extendedHeaders = null; + + static class Flags { + static final int GARBLED = 0x01; + static final int VOLUME = 0x04; + static final int EXTFILE = 0x08; + static final int PATHSYM = 0x10; + static final int BACKUP = 0x20; + } + + static class FileTypes { + static final int BINARY = 0; + static final int SEVEN_BIT_TEXT = 1; + static final int DIRECTORY = 3; + static final int VOLUME_LABEL = 4; + static final int CHAPTER_LABEL = 5; + } + + static class Methods { + static final int STORED = 0; + static final int COMPRESSED_MOST = 1; + static final int COMPRESSED_FASTEST = 4; + static final int NO_DATA_NO_CRC = 8; + static final int NO_DATA = 9; + } + + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(); + builder.append("LocalFileHeader [archiverVersionNumber="); + builder.append(archiverVersionNumber); + builder.append(", minVersionToExtract="); + builder.append(minVersionToExtract); + builder.append(", hostOS="); + builder.append(hostOS); + builder.append(", arjFlags="); + builder.append(arjFlags); + builder.append(", method="); + builder.append(method); + builder.append(", fileType="); + builder.append(fileType); + builder.append(", reserved="); + builder.append(reserved); + builder.append(", dateTimeModified="); + builder.append(dateTimeModified); + builder.append(", compressedSize="); + builder.append(compressedSize); + builder.append(", originalSize="); + builder.append(originalSize); + builder.append(", originalCrc32="); + builder.append(originalCrc32); + builder.append(", fileSpecPosition="); + builder.append(fileSpecPosition); + builder.append(", fileAccessMode="); + builder.append(fileAccessMode); + builder.append(", firstChapter="); + builder.append(firstChapter); + builder.append(", lastChapter="); + builder.append(lastChapter); + builder.append(", extendedFilePosition="); + builder.append(extendedFilePosition); + builder.append(", dateTimeAccessed="); + builder.append(dateTimeAccessed); + builder.append(", dateTimeCreated="); + builder.append(dateTimeCreated); + builder.append(", originalSizeEvenForVolumes="); + builder.append(originalSizeEvenForVolumes); + builder.append(", name="); + builder.append(name); + builder.append(", comment="); + builder.append(comment); + builder.append(", extendedHeaders="); + builder.append(Arrays.toString(extendedHeaders)); + builder.append("]"); + return builder.toString(); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/arj/MainHeader.java b/src/main/java/org/apache/commons/compress/archivers/arj/MainHeader.java new file mode 100644 index 000000000..7a9f212a2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/arj/MainHeader.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.arj; + +import java.util.Arrays; + +class MainHeader { + int archiverVersionNumber; + int minVersionToExtract; + int hostOS; + int arjFlags; + int securityVersion; + int fileType; + int reserved; + int dateTimeCreated; + int dateTimeModified; + long archiveSize; + int securityEnvelopeFilePosition; + int fileSpecPosition; + int securityEnvelopeLength; + int encryptionVersion; + int lastChapter; + int arjProtectionFactor; + int arjFlags2; + String name; + String comment; + byte[] extendedHeaderBytes = null; + + static class Flags { + static final int GARBLED = 0x01; + static final int OLD_SECURED_NEW_ANSI_PAGE = 0x02; + static final int VOLUME = 0x04; + static final int ARJPROT = 0x08; + static final int PATHSYM = 0x10; + static final int BACKUP = 0x20; + static final int SECURED = 0x40; + static final int ALTNAME = 0x80; + } + + + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(); + builder.append("MainHeader [archiverVersionNumber="); + builder.append(archiverVersionNumber); + builder.append(", minVersionToExtract="); + builder.append(minVersionToExtract); + builder.append(", hostOS="); + builder.append(hostOS); + builder.append(", arjFlags="); + builder.append(arjFlags); + builder.append(", securityVersion="); + builder.append(securityVersion); + builder.append(", fileType="); + builder.append(fileType); + builder.append(", reserved="); + builder.append(reserved); + builder.append(", dateTimeCreated="); + builder.append(dateTimeCreated); + builder.append(", dateTimeModified="); + builder.append(dateTimeModified); + builder.append(", archiveSize="); + builder.append(archiveSize); + builder.append(", securityEnvelopeFilePosition="); + builder.append(securityEnvelopeFilePosition); + builder.append(", fileSpecPosition="); + builder.append(fileSpecPosition); + builder.append(", securityEnvelopeLength="); + builder.append(securityEnvelopeLength); + builder.append(", encryptionVersion="); + builder.append(encryptionVersion); + builder.append(", lastChapter="); + builder.append(lastChapter); + builder.append(", arjProtectionFactor="); + builder.append(arjProtectionFactor); + builder.append(", arjFlags2="); + builder.append(arjFlags2); + builder.append(", name="); + builder.append(name); + builder.append(", comment="); + builder.append(comment); + builder.append(", extendedHeaderBytes="); + builder.append(Arrays.toString(extendedHeaderBytes)); + builder.append("]"); + return builder.toString(); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/arj/package.html b/src/main/java/org/apache/commons/compress/archivers/arj/package.html new file mode 100644 index 000000000..de18f61d8 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/arj/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for reading archives using + the ARJ format.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java new file mode 100644 index 000000000..79e7542df --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java @@ -0,0 +1,933 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +import java.io.File; +import java.nio.charset.Charset; +import java.util.Date; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * A cpio archive consists of a sequence of files. There are several types of + * headers defided in two categories of new and old format. The headers are + * recognized by magic numbers: + * + * <ul> + * <li>"070701" ASCII for new portable format</li> + * <li>"070702" ASCII for new portable format with CRC</li> + * <li>"070707" ASCII for old ascii (also known as Portable ASCII, odc or old + * character format</li> + * <li>070707 binary for old binary</li> + * </ul> + * + * <p>The old binary format is limited to 16 bits for user id, group + * id, device, and inode numbers. It is limited to 4 gigabyte file + * sizes. + * + * The old ASCII format is limited to 18 bits for the user id, group + * id, device, and inode numbers. It is limited to 8 gigabyte file + * sizes. + * + * The new ASCII format is limited to 4 gigabyte file sizes. + * + * CPIO 2.5 knows also about tar, but it is not recognized here.</p> + * + * + * <h3>OLD FORMAT</h3> + * + * <p>Each file has a 76 (ascii) / 26 (binary) byte header, a variable + * length, NUL terminated filename, and variable length file data. A + * header for a filename "TRAILER!!!" indicates the end of the + * archive.</p> + * + * <p>All the fields in the header are ISO 646 (approximately ASCII) + * strings of octal numbers, left padded, not NUL terminated.</p> + * + * <pre> + * FIELDNAME NOTES + * c_magic The integer value octal 070707. This value can be used to deter- + * mine whether this archive is written with little-endian or big- + * endian integers. + * c_dev Device that contains a directory entry for this file + * c_ino I-node number that identifies the input file to the file system + * c_mode The mode specifies both the regular permissions and the file type. + * c_uid Numeric User ID of the owner of the input file + * c_gid Numeric Group ID of the owner of the input file + * c_nlink Number of links that are connected to the input file + * c_rdev For block special and character special entries, this field + * contains the associated device number. For all other entry types, + * it should be set to zero by writers and ignored by readers. + * c_mtime[2] Modification time of the file, indicated as the number of seconds + * since the start of the epoch, 00:00:00 UTC January 1, 1970. The + * four-byte integer is stored with the most-significant 16 bits + * first followed by the least-significant 16 bits. Each of the two + * 16 bit values are stored in machine-native byte order. + * c_namesize Length of the path name, including the terminating null byte + * c_filesize[2] Length of the file in bytes. This is the length of the data + * section that follows the header structure. Must be 0 for + * FIFOs and directories + * + * All fields are unsigned short fields with 16-bit integer values + * apart from c_mtime and c_filesize which are 32-bit integer values + * </pre> + * + * <p>If necessary, the filename and file data are padded with a NUL byte to an even length</p> + * + * <p>Special files, directories, and the trailer are recorded with + * the h_filesize field equal to 0.</p> + * + * <p>In the ASCII version of this format, the 16-bit entries are represented as 6-byte octal numbers, + * and the 32-bit entries are represented as 11-byte octal numbers. No padding is added.</p> + * + * <h3>NEW FORMAT</h3> + * + * <p>Each file has a 110 byte header, a variable length, NUL + * terminated filename, and variable length file data. A header for a + * filename "TRAILER!!!" indicates the end of the archive. All the + * fields in the header are ISO 646 (approximately ASCII) strings of + * hexadecimal numbers, left padded, not NUL terminated.</p> + * + * <pre> + * FIELDNAME NOTES + * c_magic[6] The string 070701 for new ASCII, the string 070702 for new ASCII with CRC + * c_ino[8] + * c_mode[8] + * c_uid[8] + * c_gid[8] + * c_nlink[8] + * c_mtim[8] + * c_filesize[8] must be 0 for FIFOs and directories + * c_maj[8] + * c_min[8] + * c_rmaj[8] only valid for chr and blk special files + * c_rmin[8] only valid for chr and blk special files + * c_namesize[8] count includes terminating NUL in pathname + * c_check[8] 0 for "new" portable format; for CRC format + * the sum of all the bytes in the file + * </pre> + * + * <p>New ASCII Format The "new" ASCII format uses 8-byte hexadecimal + * fields for all numbers and separates device numbers into separate + * fields for major and minor numbers.</p> + * + * <p>The pathname is followed by NUL bytes so that the total size of + * the fixed header plus pathname is a multiple of four. Likewise, the + * file data is padded to a multiple of four bytes.</p> + * + * <p>This class uses mutable fields and is not considered to be + * threadsafe.</p> + * + * <p>Based on code from the jRPM project (http://jrpm.sourceforge.net).</p> + * + * <p>The MAGIC numbers and other constants are defined in {@link CpioConstants}</p> + * + * <p> + * N.B. does not handle the cpio "tar" format + * </p> + * @NotThreadSafe + * @see <a href="https://people.freebsd.org/~kientzle/libarchive/man/cpio.5.txt">https://people.freebsd.org/~kientzle/libarchive/man/cpio.5.txt</a> + */ +public class CpioArchiveEntry implements CpioConstants, ArchiveEntry { + + // Header description fields - should be same throughout an archive + + /** + * See constructor documenation for possible values. + */ + private final short fileFormat; + + /** The number of bytes in each header record; depends on the file format */ + private final int headerSize; + + /** The boundary to which the header and data elements are aligned: 0, 2 or 4 bytes */ + private final int alignmentBoundary; + + // Header fields + + private long chksum = 0; + + /** Number of bytes in the file */ + private long filesize = 0; + + private long gid = 0; + + private long inode = 0; + + private long maj = 0; + + private long min = 0; + + private long mode = 0; + + private long mtime = 0; + + private String name; + + private long nlink = 0; + + private long rmaj = 0; + + private long rmin = 0; + + private long uid = 0; + + /** + * Creates a CpioArchiveEntry with a specified format. + * + * @param format + * The cpio format for this entry. + * <p> + * Possible format values are: + * <pre> + * CpioConstants.FORMAT_NEW + * CpioConstants.FORMAT_NEW_CRC + * CpioConstants.FORMAT_OLD_BINARY + * CpioConstants.FORMAT_OLD_ASCII + * </pre> + */ + public CpioArchiveEntry(final short format) { + switch (format) { + case FORMAT_NEW: + this.headerSize = 110; + this.alignmentBoundary = 4; + break; + case FORMAT_NEW_CRC: + this.headerSize = 110; + this.alignmentBoundary = 4; + break; + case FORMAT_OLD_ASCII: + this.headerSize = 76; + this.alignmentBoundary = 0; + break; + case FORMAT_OLD_BINARY: + this.headerSize = 26; + this.alignmentBoundary = 2; + break; + default: + throw new IllegalArgumentException("Unknown header type"); + } + this.fileFormat = format; + } + + /** + * Creates a CpioArchiveEntry with a specified name. The format of + * this entry will be the new format. + * + * @param name + * The name of this entry. + */ + public CpioArchiveEntry(final String name) { + this(FORMAT_NEW, name); + } + + /** + * Creates a CpioArchiveEntry with a specified name. + * + * @param format + * The cpio format for this entry. + * @param name + * The name of this entry. + * <p> + * Possible format values are: + * <pre> + * CpioConstants.FORMAT_NEW + * CpioConstants.FORMAT_NEW_CRC + * CpioConstants.FORMAT_OLD_BINARY + * CpioConstants.FORMAT_OLD_ASCII + * </pre> + * + * @since 1.1 + */ + public CpioArchiveEntry(final short format, final String name) { + this(format); + this.name = name; + } + + /** + * Creates a CpioArchiveEntry with a specified name. The format of + * this entry will be the new format. + * + * @param name + * The name of this entry. + * @param size + * The size of this entry + */ + public CpioArchiveEntry(final String name, final long size) { + this(name); + this.setSize(size); + } + + /** + * Creates a CpioArchiveEntry with a specified name. + * + * @param format + * The cpio format for this entry. + * @param name + * The name of this entry. + * @param size + * The size of this entry + * <p> + * Possible format values are: + * <pre> + * CpioConstants.FORMAT_NEW + * CpioConstants.FORMAT_NEW_CRC + * CpioConstants.FORMAT_OLD_BINARY + * CpioConstants.FORMAT_OLD_ASCII + * </pre> + * + * @since 1.1 + */ + public CpioArchiveEntry(final short format, final String name, + final long size) { + this(format, name); + this.setSize(size); + } + + /** + * Creates a CpioArchiveEntry with a specified name for a + * specified file. The format of this entry will be the new + * format. + * + * @param inputFile + * The file to gather information from. + * @param entryName + * The name of this entry. + */ + public CpioArchiveEntry(final File inputFile, final String entryName) { + this(FORMAT_NEW, inputFile, entryName); + } + + /** + * Creates a CpioArchiveEntry with a specified name for a + * specified file. + * + * @param format + * The cpio format for this entry. + * @param inputFile + * The file to gather information from. + * @param entryName + * The name of this entry. + * <p> + * Possible format values are: + * <pre> + * CpioConstants.FORMAT_NEW + * CpioConstants.FORMAT_NEW_CRC + * CpioConstants.FORMAT_OLD_BINARY + * CpioConstants.FORMAT_OLD_ASCII + * </pre> + * + * @since 1.1 + */ + public CpioArchiveEntry(final short format, final File inputFile, + final String entryName) { + this(format, entryName, inputFile.isFile() ? inputFile.length() : 0); + if (inputFile.isDirectory()){ + setMode(C_ISDIR); + } else if (inputFile.isFile()){ + setMode(C_ISREG); + } else { + throw new IllegalArgumentException("Cannot determine type of file " + + inputFile.getName()); + } + // TODO set other fields as needed + setTime(inputFile.lastModified() / 1000); + } + + /** + * Check if the method is allowed for the defined format. + */ + private void checkNewFormat() { + if ((this.fileFormat & FORMAT_NEW_MASK) == 0) { + throw new UnsupportedOperationException(); + } + } + + /** + * Check if the method is allowed for the defined format. + */ + private void checkOldFormat() { + if ((this.fileFormat & FORMAT_OLD_MASK) == 0) { + throw new UnsupportedOperationException(); + } + } + + /** + * Get the checksum. + * Only supported for the new formats. + * + * @return Returns the checksum. + * @throws UnsupportedOperationException if the format is not a new format + */ + public long getChksum() { + checkNewFormat(); + return this.chksum & 0xFFFFFFFFL; + } + + /** + * Get the device id. + * + * @return Returns the device id. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with a new + * format. + */ + public long getDevice() { + checkOldFormat(); + return this.min; + } + + /** + * Get the major device id. + * + * @return Returns the major device id. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with an old + * format. + */ + public long getDeviceMaj() { + checkNewFormat(); + return this.maj; + } + + /** + * Get the minor device id + * + * @return Returns the minor device id. + * @throws UnsupportedOperationException if format is not a new format + */ + public long getDeviceMin() { + checkNewFormat(); + return this.min; + } + + /** + * Get the filesize. + * + * @return Returns the filesize. + * @see org.apache.commons.compress.archivers.ArchiveEntry#getSize() + */ + @Override + public long getSize() { + return this.filesize; + } + + /** + * Get the format for this entry. + * + * @return Returns the format. + */ + public short getFormat() { + return this.fileFormat; + } + + /** + * Get the group id. + * + * @return Returns the group id. + */ + public long getGID() { + return this.gid; + } + + /** + * Get the header size for this CPIO format + * + * @return Returns the header size in bytes. + */ + public int getHeaderSize() { + return this.headerSize; + } + + /** + * Get the alignment boundary for this CPIO format + * + * @return Returns the aligment boundary (0, 2, 4) in bytes + */ + public int getAlignmentBoundary() { + return this.alignmentBoundary; + } + + /** + * Get the number of bytes needed to pad the header to the alignment boundary. + * + * @deprecated This method doesn't properly work for multi-byte encodings. And + * creates corrupt archives. Use {@link #getHeaderPadCount(Charset)} + * or {@link #getHeaderPadCount(long)} in any case. + * @return the number of bytes needed to pad the header (0,1,2,3) + */ + @Deprecated + public int getHeaderPadCount(){ + return getHeaderPadCount(null); + } + + /** + * Get the number of bytes needed to pad the header to the alignment boundary. + * + * @param charset + * The character set used to encode the entry name in the stream. + * @return the number of bytes needed to pad the header (0,1,2,3) + * @since 1.18 + */ + public int getHeaderPadCount(Charset charset) { + if (name == null) { + return 0; + } + if (charset == null) { + return getHeaderPadCount(name.length()); + } + return getHeaderPadCount(name.getBytes(charset).length); + } + + /** + * Get the number of bytes needed to pad the header to the alignment boundary. + * + * @param namesize + * The length of the name in bytes, as read in the stream. + * Without the trailing zero byte. + * @return the number of bytes needed to pad the header (0,1,2,3) + * + * @since 1.18 + */ + public int getHeaderPadCount(long namesize) { + if (this.alignmentBoundary == 0) { return 0; } + int size = this.headerSize + 1; // Name has terminating null + if (name != null) { + size += namesize; + } + final int remain = size % this.alignmentBoundary; + if (remain > 0) { + return this.alignmentBoundary - remain; + } + return 0; + } + + /** + * Get the number of bytes needed to pad the data to the alignment boundary. + * + * @return the number of bytes needed to pad the data (0,1,2,3) + */ + public int getDataPadCount(){ + if (this.alignmentBoundary == 0) { return 0; } + final long size = this.filesize; + final int remain = (int) (size % this.alignmentBoundary); + if (remain > 0){ + return this.alignmentBoundary - remain; + } + return 0; + } + + /** + * Set the inode. + * + * @return Returns the inode. + */ + public long getInode() { + return this.inode; + } + + /** + * Get the mode of this entry (e.g. directory, regular file). + * + * @return Returns the mode. + */ + public long getMode() { + return mode == 0 && !CPIO_TRAILER.equals(name) ? C_ISREG : mode; + } + + /** + * Get the name. + * + * <p>This method returns the raw name as it is stored inside of the archive.</p> + * + * @return Returns the name. + */ + @Override + public String getName() { + return this.name; + } + + /** + * Get the number of links. + * + * @return Returns the number of links. + */ + public long getNumberOfLinks() { + return nlink == 0 ? + isDirectory() ? 2 : 1 + : nlink; + } + + /** + * Get the remote device id. + * + * @return Returns the remote device id. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with a new + * format. + */ + public long getRemoteDevice() { + checkOldFormat(); + return this.rmin; + } + + /** + * Get the remote major device id. + * + * @return Returns the remote major device id. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with an old + * format. + */ + public long getRemoteDeviceMaj() { + checkNewFormat(); + return this.rmaj; + } + + /** + * Get the remote minor device id. + * + * @return Returns the remote minor device id. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with an old + * format. + */ + public long getRemoteDeviceMin() { + checkNewFormat(); + return this.rmin; + } + + /** + * Get the time in seconds. + * + * @return Returns the time. + */ + public long getTime() { + return this.mtime; + } + + @Override + public Date getLastModifiedDate() { + return new Date(1000 * getTime()); + } + + /** + * Get the user id. + * + * @return Returns the user id. + */ + public long getUID() { + return this.uid; + } + + /** + * Check if this entry represents a block device. + * + * @return TRUE if this entry is a block device. + */ + public boolean isBlockDevice() { + return CpioUtil.fileType(mode) == C_ISBLK; + } + + /** + * Check if this entry represents a character device. + * + * @return TRUE if this entry is a character device. + */ + public boolean isCharacterDevice() { + return CpioUtil.fileType(mode) == C_ISCHR; + } + + /** + * Check if this entry represents a directory. + * + * @return TRUE if this entry is a directory. + */ + @Override + public boolean isDirectory() { + return CpioUtil.fileType(mode) == C_ISDIR; + } + + /** + * Check if this entry represents a network device. + * + * @return TRUE if this entry is a network device. + */ + public boolean isNetwork() { + return CpioUtil.fileType(mode) == C_ISNWK; + } + + /** + * Check if this entry represents a pipe. + * + * @return TRUE if this entry is a pipe. + */ + public boolean isPipe() { + return CpioUtil.fileType(mode) == C_ISFIFO; + } + + /** + * Check if this entry represents a regular file. + * + * @return TRUE if this entry is a regular file. + */ + public boolean isRegularFile() { + return CpioUtil.fileType(mode) == C_ISREG; + } + + /** + * Check if this entry represents a socket. + * + * @return TRUE if this entry is a socket. + */ + public boolean isSocket() { + return CpioUtil.fileType(mode) == C_ISSOCK; + } + + /** + * Check if this entry represents a symbolic link. + * + * @return TRUE if this entry is a symbolic link. + */ + public boolean isSymbolicLink() { + return CpioUtil.fileType(mode) == C_ISLNK; + } + + /** + * Set the checksum. The checksum is calculated by adding all bytes of a + * file to transfer (crc += buf[pos] & 0xFF). + * + * @param chksum + * The checksum to set. + */ + public void setChksum(final long chksum) { + checkNewFormat(); + this.chksum = chksum & 0xFFFFFFFFL; + } + + /** + * Set the device id. + * + * @param device + * The device id to set. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with a new + * format. + */ + public void setDevice(final long device) { + checkOldFormat(); + this.min = device; + } + + /** + * Set major device id. + * + * @param maj + * The major device id to set. + */ + public void setDeviceMaj(final long maj) { + checkNewFormat(); + this.maj = maj; + } + + /** + * Set the minor device id + * + * @param min + * The minor device id to set. + */ + public void setDeviceMin(final long min) { + checkNewFormat(); + this.min = min; + } + + /** + * Set the filesize. + * + * @param size + * The filesize to set. + */ + public void setSize(final long size) { + if (size < 0 || size > 0xFFFFFFFFL) { + throw new IllegalArgumentException("invalid entry size <" + size + + ">"); + } + this.filesize = size; + } + + /** + * Set the group id. + * + * @param gid + * The group id to set. + */ + public void setGID(final long gid) { + this.gid = gid; + } + + /** + * Set the inode. + * + * @param inode + * The inode to set. + */ + public void setInode(final long inode) { + this.inode = inode; + } + + /** + * Set the mode of this entry (e.g. directory, regular file). + * + * @param mode + * The mode to set. + */ + public void setMode(final long mode) { + final long maskedMode = mode & S_IFMT; + switch ((int) maskedMode) { + case C_ISDIR: + case C_ISLNK: + case C_ISREG: + case C_ISFIFO: + case C_ISCHR: + case C_ISBLK: + case C_ISSOCK: + case C_ISNWK: + break; + default: + throw new IllegalArgumentException( + "Unknown mode. " + + "Full: " + Long.toHexString(mode) + + " Masked: " + Long.toHexString(maskedMode)); + } + + this.mode = mode; + } + + /** + * Set the name. + * + * @param name + * The name to set. + */ + public void setName(final String name) { + this.name = name; + } + + /** + * Set the number of links. + * + * @param nlink + * The number of links to set. + */ + public void setNumberOfLinks(final long nlink) { + this.nlink = nlink; + } + + /** + * Set the remote device id. + * + * @param device + * The remote device id to set. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with a new + * format. + */ + public void setRemoteDevice(final long device) { + checkOldFormat(); + this.rmin = device; + } + + /** + * Set the remote major device id. + * + * @param rmaj + * The remote major device id to set. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with an old + * format. + */ + public void setRemoteDeviceMaj(final long rmaj) { + checkNewFormat(); + this.rmaj = rmaj; + } + + /** + * Set the remote minor device id. + * + * @param rmin + * The remote minor device id to set. + * @throws UnsupportedOperationException + * if this method is called for a CpioArchiveEntry with an old + * format. + */ + public void setRemoteDeviceMin(final long rmin) { + checkNewFormat(); + this.rmin = rmin; + } + + /** + * Set the time in seconds. + * + * @param time + * The time to set. + */ + public void setTime(final long time) { + this.mtime = time; + } + + /** + * Set the user id. + * + * @param uid + * The user id to set. + */ + public void setUID(final long uid) { + this.uid = uid; + } + + /* (non-Javadoc) + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (name == null ? 0 : name.hashCode()); + return result; + } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + final CpioArchiveEntry other = (CpioArchiveEntry) obj; + if (name == null) { + return other.name == null; + } else { + return name.equals(other.name); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java new file mode 100644 index 000000000..4cedc272d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java @@ -0,0 +1,582 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; +import org.apache.commons.compress.utils.ArchiveUtils; +import org.apache.commons.compress.utils.CharsetNames; +import org.apache.commons.compress.utils.IOUtils; + +/** + * CpioArchiveInputStream is a stream for reading cpio streams. All formats of + * cpio are supported (old ascii, old binary, new portable format and the new + * portable format with crc). + * + * <p> + * The stream can be read by extracting a cpio entry (containing all + * informations about a entry) and afterwards reading from the stream the file + * specified by the entry. + * </p> + * <pre> + * CpioArchiveInputStream cpioIn = new CpioArchiveInputStream( + * Files.newInputStream(Paths.get("test.cpio"))); + * CpioArchiveEntry cpioEntry; + * + * while ((cpioEntry = cpioIn.getNextEntry()) != null) { + * System.out.println(cpioEntry.getName()); + * int tmp; + * StringBuilder buf = new StringBuilder(); + * while ((tmp = cpIn.read()) != -1) { + * buf.append((char) tmp); + * } + * System.out.println(buf.toString()); + * } + * cpioIn.close(); + * </pre> + * <p> + * Note: This implementation should be compatible to cpio 2.5 + * + * <p>This class uses mutable fields and is not considered to be threadsafe. + * + * <p>Based on code from the jRPM project (jrpm.sourceforge.net) + */ + +public class CpioArchiveInputStream extends ArchiveInputStream implements + CpioConstants { + + private boolean closed = false; + + private CpioArchiveEntry entry; + + private long entryBytesRead = 0; + + private boolean entryEOF = false; + + private final byte tmpbuf[] = new byte[4096]; + + private long crc = 0; + + private final InputStream in; + + // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) + private final byte[] twoBytesBuf = new byte[2]; + private final byte[] fourBytesBuf = new byte[4]; + private final byte[] sixBytesBuf = new byte[6]; + + private final int blockSize; + + /** + * The encoding to use for filenames and labels. + */ + private final ZipEncoding zipEncoding; + + // the provided encoding (for unit tests) + final String encoding; + + /** + * Construct the cpio input stream with a blocksize of {@link + * CpioConstants#BLOCK_SIZE BLOCK_SIZE} and expecting ASCII file + * names. + * + * @param in + * The cpio stream + */ + public CpioArchiveInputStream(final InputStream in) { + this(in, BLOCK_SIZE, CharsetNames.US_ASCII); + } + + /** + * Construct the cpio input stream with a blocksize of {@link + * CpioConstants#BLOCK_SIZE BLOCK_SIZE}. + * + * @param in + * The cpio stream + * @param encoding + * The encoding of file names to expect - use null for + * the platform's default. + * @since 1.6 + */ + public CpioArchiveInputStream(final InputStream in, final String encoding) { + this(in, BLOCK_SIZE, encoding); + } + + /** + * Construct the cpio input stream with a blocksize of {@link + * CpioConstants#BLOCK_SIZE BLOCK_SIZE} expecting ASCII file + * names. + * + * @param in + * The cpio stream + * @param blockSize + * The block size of the archive. + * @since 1.5 + */ + public CpioArchiveInputStream(final InputStream in, final int blockSize) { + this(in, blockSize, CharsetNames.US_ASCII); + } + + /** + * Construct the cpio input stream with a blocksize of {@link CpioConstants#BLOCK_SIZE BLOCK_SIZE}. + * + * @param in + * The cpio stream + * @param blockSize + * The block size of the archive. + * @param encoding + * The encoding of file names to expect - use null for + * the platform's default. + * @since 1.6 + */ + public CpioArchiveInputStream(final InputStream in, final int blockSize, final String encoding) { + this.in = in; + this.blockSize = blockSize; + this.encoding = encoding; + this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + } + + /** + * Returns 0 after EOF has reached for the current entry data, otherwise + * always return 1. + * <p> + * Programs should not count on this method to return the actual number of + * bytes that could be read without blocking. + * + * @return 1 before EOF and 0 after EOF has reached for current entry. + * @throws IOException + * if an I/O error has occurred or if a CPIO file error has + * occurred + */ + @Override + public int available() throws IOException { + ensureOpen(); + if (this.entryEOF) { + return 0; + } + return 1; + } + + /** + * Closes the CPIO input stream. + * + * @throws IOException + * if an I/O error has occurred + */ + @Override + public void close() throws IOException { + if (!this.closed) { + in.close(); + this.closed = true; + } + } + + /** + * Closes the current CPIO entry and positions the stream for reading the + * next entry. + * + * @throws IOException + * if an I/O error has occurred or if a CPIO file error has + * occurred + */ + private void closeEntry() throws IOException { + // the skip implementation of this class will not skip more + // than Integer.MAX_VALUE bytes + while (skip((long) Integer.MAX_VALUE) == Integer.MAX_VALUE) { // NOPMD + // do nothing + } + } + + /** + * Check to make sure that this stream has not been closed + * + * @throws IOException + * if the stream is already closed + */ + private void ensureOpen() throws IOException { + if (this.closed) { + throw new IOException("Stream closed"); + } + } + + /** + * Reads the next CPIO file entry and positions stream at the beginning of + * the entry data. + * + * @return the CpioArchiveEntry just read + * @throws IOException + * if an I/O error has occurred or if a CPIO file error has + * occurred + */ + public CpioArchiveEntry getNextCPIOEntry() throws IOException { + ensureOpen(); + if (this.entry != null) { + closeEntry(); + } + readFully(twoBytesBuf, 0, twoBytesBuf.length); + if (CpioUtil.byteArray2long(twoBytesBuf, false) == MAGIC_OLD_BINARY) { + this.entry = readOldBinaryEntry(false); + } else if (CpioUtil.byteArray2long(twoBytesBuf, true) + == MAGIC_OLD_BINARY) { + this.entry = readOldBinaryEntry(true); + } else { + System.arraycopy(twoBytesBuf, 0, sixBytesBuf, 0, + twoBytesBuf.length); + readFully(sixBytesBuf, twoBytesBuf.length, + fourBytesBuf.length); + final String magicString = ArchiveUtils.toAsciiString(sixBytesBuf); + switch (magicString) { + case MAGIC_NEW: + this.entry = readNewEntry(false); + break; + case MAGIC_NEW_CRC: + this.entry = readNewEntry(true); + break; + case MAGIC_OLD_ASCII: + this.entry = readOldAsciiEntry(); + break; + default: + throw new IOException("Unknown magic [" + magicString + "]. Occured at byte: " + getBytesRead()); + } + } + + this.entryBytesRead = 0; + this.entryEOF = false; + this.crc = 0; + + if (this.entry.getName().equals(CPIO_TRAILER)) { + this.entryEOF = true; + skipRemainderOfLastBlock(); + return null; + } + return this.entry; + } + + private void skip(final int bytes) throws IOException{ + // bytes cannot be more than 3 bytes + if (bytes > 0) { + readFully(fourBytesBuf, 0, bytes); + } + } + + /** + * Reads from the current CPIO entry into an array of bytes. Blocks until + * some input is available. + * + * @param b + * the buffer into which the data is read + * @param off + * the start offset of the data + * @param len + * the maximum number of bytes read + * @return the actual number of bytes read, or -1 if the end of the entry is + * reached + * @throws IOException + * if an I/O error has occurred or if a CPIO file error has + * occurred + */ + @Override + public int read(final byte[] b, final int off, final int len) + throws IOException { + ensureOpen(); + if (off < 0 || len < 0 || off > b.length - len) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return 0; + } + + if (this.entry == null || this.entryEOF) { + return -1; + } + if (this.entryBytesRead == this.entry.getSize()) { + skip(entry.getDataPadCount()); + this.entryEOF = true; + if (this.entry.getFormat() == FORMAT_NEW_CRC + && this.crc != this.entry.getChksum()) { + throw new IOException("CRC Error. Occured at byte: " + + getBytesRead()); + } + return -1; // EOF for this entry + } + final int tmplength = (int) Math.min(len, this.entry.getSize() + - this.entryBytesRead); + if (tmplength < 0) { + return -1; + } + + final int tmpread = readFully(b, off, tmplength); + if (this.entry.getFormat() == FORMAT_NEW_CRC) { + for (int pos = 0; pos < tmpread; pos++) { + this.crc += b[pos] & 0xFF; + this.crc &= 0xFFFFFFFFL; + } + } + if (tmpread > 0) { + this.entryBytesRead += tmpread; + } + + return tmpread; + } + + private final int readFully(final byte[] b, final int off, final int len) + throws IOException { + final int count = IOUtils.readFully(in, b, off, len); + count(count); + if (count < len) { + throw new EOFException(); + } + return count; + } + + private long readBinaryLong(final int length, final boolean swapHalfWord) + throws IOException { + final byte tmp[] = new byte[length]; + readFully(tmp, 0, tmp.length); + return CpioUtil.byteArray2long(tmp, swapHalfWord); + } + + private long readAsciiLong(final int length, final int radix) + throws IOException { + final byte tmpBuffer[] = new byte[length]; + readFully(tmpBuffer, 0, tmpBuffer.length); + return Long.parseLong(ArchiveUtils.toAsciiString(tmpBuffer), radix); + } + + private CpioArchiveEntry readNewEntry(final boolean hasCrc) + throws IOException { + CpioArchiveEntry ret; + if (hasCrc) { + ret = new CpioArchiveEntry(FORMAT_NEW_CRC); + } else { + ret = new CpioArchiveEntry(FORMAT_NEW); + } + + ret.setInode(readAsciiLong(8, 16)); + final long mode = readAsciiLong(8, 16); + if (CpioUtil.fileType(mode) != 0){ // mode is initialised to 0 + ret.setMode(mode); + } + ret.setUID(readAsciiLong(8, 16)); + ret.setGID(readAsciiLong(8, 16)); + ret.setNumberOfLinks(readAsciiLong(8, 16)); + ret.setTime(readAsciiLong(8, 16)); + ret.setSize(readAsciiLong(8, 16)); + ret.setDeviceMaj(readAsciiLong(8, 16)); + ret.setDeviceMin(readAsciiLong(8, 16)); + ret.setRemoteDeviceMaj(readAsciiLong(8, 16)); + ret.setRemoteDeviceMin(readAsciiLong(8, 16)); + final long namesize = readAsciiLong(8, 16); + ret.setChksum(readAsciiLong(8, 16)); + final String name = readCString((int) namesize); + ret.setName(name); + if (CpioUtil.fileType(mode) == 0 && !name.equals(CPIO_TRAILER)){ + throw new IOException("Mode 0 only allowed in the trailer. Found entry name: " + + ArchiveUtils.sanitize(name) + + " Occured at byte: " + getBytesRead()); + } + skip(ret.getHeaderPadCount(namesize - 1)); + + return ret; + } + + private CpioArchiveEntry readOldAsciiEntry() throws IOException { + final CpioArchiveEntry ret = new CpioArchiveEntry(FORMAT_OLD_ASCII); + + ret.setDevice(readAsciiLong(6, 8)); + ret.setInode(readAsciiLong(6, 8)); + final long mode = readAsciiLong(6, 8); + if (CpioUtil.fileType(mode) != 0) { + ret.setMode(mode); + } + ret.setUID(readAsciiLong(6, 8)); + ret.setGID(readAsciiLong(6, 8)); + ret.setNumberOfLinks(readAsciiLong(6, 8)); + ret.setRemoteDevice(readAsciiLong(6, 8)); + ret.setTime(readAsciiLong(11, 8)); + final long namesize = readAsciiLong(6, 8); + ret.setSize(readAsciiLong(11, 8)); + final String name = readCString((int) namesize); + ret.setName(name); + if (CpioUtil.fileType(mode) == 0 && !name.equals(CPIO_TRAILER)){ + throw new IOException("Mode 0 only allowed in the trailer. Found entry: " + + ArchiveUtils.sanitize(name) + + " Occured at byte: " + getBytesRead()); + } + + return ret; + } + + private CpioArchiveEntry readOldBinaryEntry(final boolean swapHalfWord) + throws IOException { + final CpioArchiveEntry ret = new CpioArchiveEntry(FORMAT_OLD_BINARY); + + ret.setDevice(readBinaryLong(2, swapHalfWord)); + ret.setInode(readBinaryLong(2, swapHalfWord)); + final long mode = readBinaryLong(2, swapHalfWord); + if (CpioUtil.fileType(mode) != 0){ + ret.setMode(mode); + } + ret.setUID(readBinaryLong(2, swapHalfWord)); + ret.setGID(readBinaryLong(2, swapHalfWord)); + ret.setNumberOfLinks(readBinaryLong(2, swapHalfWord)); + ret.setRemoteDevice(readBinaryLong(2, swapHalfWord)); + ret.setTime(readBinaryLong(4, swapHalfWord)); + final long namesize = readBinaryLong(2, swapHalfWord); + ret.setSize(readBinaryLong(4, swapHalfWord)); + final String name = readCString((int) namesize); + ret.setName(name); + if (CpioUtil.fileType(mode) == 0 && !name.equals(CPIO_TRAILER)){ + throw new IOException("Mode 0 only allowed in the trailer. Found entry: " + + ArchiveUtils.sanitize(name) + + "Occured at byte: " + getBytesRead()); + } + skip(ret.getHeaderPadCount(namesize - 1)); + + return ret; + } + + private String readCString(final int length) throws IOException { + // don't include trailing NUL in file name to decode + final byte tmpBuffer[] = new byte[length - 1]; + readFully(tmpBuffer, 0, tmpBuffer.length); + this.in.read(); + return zipEncoding.decode(tmpBuffer); + } + + /** + * Skips specified number of bytes in the current CPIO entry. + * + * @param n + * the number of bytes to skip + * @return the actual number of bytes skipped + * @throws IOException + * if an I/O error has occurred + * @throws IllegalArgumentException + * if n < 0 + */ + @Override + public long skip(final long n) throws IOException { + if (n < 0) { + throw new IllegalArgumentException("negative skip length"); + } + ensureOpen(); + final int max = (int) Math.min(n, Integer.MAX_VALUE); + int total = 0; + + while (total < max) { + int len = max - total; + if (len > this.tmpbuf.length) { + len = this.tmpbuf.length; + } + len = read(this.tmpbuf, 0, len); + if (len == -1) { + this.entryEOF = true; + break; + } + total += len; + } + return total; + } + + @Override + public ArchiveEntry getNextEntry() throws IOException { + return getNextCPIOEntry(); + } + + /** + * Skips the padding zeros written after the TRAILER!!! entry. + */ + private void skipRemainderOfLastBlock() throws IOException { + final long readFromLastBlock = getBytesRead() % blockSize; + long remainingBytes = readFromLastBlock == 0 ? 0 + : blockSize - readFromLastBlock; + while (remainingBytes > 0) { + final long skipped = skip(blockSize - readFromLastBlock); + if (skipped <= 0) { + break; + } + remainingBytes -= skipped; + } + } + + /** + * Checks if the signature matches one of the following magic values: + * + * Strings: + * + * "070701" - MAGIC_NEW + * "070702" - MAGIC_NEW_CRC + * "070707" - MAGIC_OLD_ASCII + * + * Octal Binary value: + * + * 070707 - MAGIC_OLD_BINARY (held as a short) = 0x71C7 or 0xC771 + * @param signature data to match + * @param length length of data + * @return whether the buffer seems to contain CPIO data + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < 6) { + return false; + } + + // Check binary values + if (signature[0] == 0x71 && (signature[1] & 0xFF) == 0xc7) { + return true; + } + if (signature[1] == 0x71 && (signature[0] & 0xFF) == 0xc7) { + return true; + } + + // Check Ascii (String) values + // 3037 3037 30nn + if (signature[0] != 0x30) { + return false; + } + if (signature[1] != 0x37) { + return false; + } + if (signature[2] != 0x30) { + return false; + } + if (signature[3] != 0x37) { + return false; + } + if (signature[4] != 0x30) { + return false; + } + // Check last byte + if (signature[5] == 0x31) { + return true; + } + if (signature[5] == 0x32) { + return true; + } + if (signature[5] == 0x37) { + return true; + } + + return false; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStream.java new file mode 100644 index 000000000..f317653ad --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStream.java @@ -0,0 +1,580 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.HashMap; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; +import org.apache.commons.compress.utils.ArchiveUtils; +import org.apache.commons.compress.utils.CharsetNames; + +/** + * CpioArchiveOutputStream is a stream for writing CPIO streams. All formats of + * CPIO are supported (old ASCII, old binary, new portable format and the new + * portable format with CRC). + * + * <p>An entry can be written by creating an instance of CpioArchiveEntry and fill + * it with the necessary values and put it into the CPIO stream. Afterwards + * write the contents of the file into the CPIO stream. Either close the stream + * by calling finish() or put a next entry into the cpio stream.</p> + * + * <pre> + * CpioArchiveOutputStream out = new CpioArchiveOutputStream( + * new FileOutputStream(new File("test.cpio"))); + * CpioArchiveEntry entry = new CpioArchiveEntry(); + * entry.setName("testfile"); + * String contents = "12345"; + * entry.setFileSize(contents.length()); + * entry.setMode(CpioConstants.C_ISREG); // regular file + * ... set other attributes, e.g. time, number of links + * out.putArchiveEntry(entry); + * out.write(testContents.getBytes()); + * out.close(); + * </pre> + * + * <p>Note: This implementation should be compatible to cpio 2.5</p> + * + * <p>This class uses mutable fields and is not considered threadsafe.</p> + * + * <p>based on code from the jRPM project (jrpm.sourceforge.net)</p> + */ +public class CpioArchiveOutputStream extends ArchiveOutputStream implements + CpioConstants { + + private CpioArchiveEntry entry; + + private boolean closed = false; + + /** indicates if this archive is finished */ + private boolean finished; + + /** + * See {@link CpioArchiveEntry#setFormat(short)} for possible values. + */ + private final short entryFormat; + + private final HashMap<String, CpioArchiveEntry> names = + new HashMap<>(); + + private long crc = 0; + + private long written; + + private final OutputStream out; + + private final int blockSize; + + private long nextArtificalDeviceAndInode = 1; + + /** + * The encoding to use for filenames and labels. + */ + private final ZipEncoding zipEncoding; + + // the provided encoding (for unit tests) + final String encoding; + + /** + * Construct the cpio output stream with a specified format, a + * blocksize of {@link CpioConstants#BLOCK_SIZE BLOCK_SIZE} and + * using ASCII as the file name encoding. + * + * @param out + * The cpio stream + * @param format + * The format of the stream + */ + public CpioArchiveOutputStream(final OutputStream out, final short format) { + this(out, format, BLOCK_SIZE, CharsetNames.US_ASCII); + } + + /** + * Construct the cpio output stream with a specified format using + * ASCII as the file name encoding. + * + * @param out + * The cpio stream + * @param format + * The format of the stream + * @param blockSize + * The block size of the archive. + * + * @since 1.1 + */ + public CpioArchiveOutputStream(final OutputStream out, final short format, + final int blockSize) { + this(out, format, blockSize, CharsetNames.US_ASCII); + } + + /** + * Construct the cpio output stream with a specified format using + * ASCII as the file name encoding. + * + * @param out + * The cpio stream + * @param format + * The format of the stream + * @param blockSize + * The block size of the archive. + * @param encoding + * The encoding of file names to write - use null for + * the platform's default. + * + * @since 1.6 + */ + public CpioArchiveOutputStream(final OutputStream out, final short format, + final int blockSize, final String encoding) { + this.out = out; + switch (format) { + case FORMAT_NEW: + case FORMAT_NEW_CRC: + case FORMAT_OLD_ASCII: + case FORMAT_OLD_BINARY: + break; + default: + throw new IllegalArgumentException("Unknown format: "+format); + + } + this.entryFormat = format; + this.blockSize = blockSize; + this.encoding = encoding; + this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + } + + /** + * Construct the cpio output stream. The format for this CPIO stream is the + * "new" format using ASCII encoding for file names + * + * @param out + * The cpio stream + */ + public CpioArchiveOutputStream(final OutputStream out) { + this(out, FORMAT_NEW); + } + + /** + * Construct the cpio output stream. The format for this CPIO stream is the + * "new" format. + * + * @param out + * The cpio stream + * @param encoding + * The encoding of file names to write - use null for + * the platform's default. + * @since 1.6 + */ + public CpioArchiveOutputStream(final OutputStream out, final String encoding) { + this(out, FORMAT_NEW, BLOCK_SIZE, encoding); + } + + /** + * Check to make sure that this stream has not been closed + * + * @throws IOException + * if the stream is already closed + */ + private void ensureOpen() throws IOException { + if (this.closed) { + throw new IOException("Stream closed"); + } + } + + /** + * Begins writing a new CPIO file entry and positions the stream to the + * start of the entry data. Closes the current entry if still active. The + * current time will be used if the entry has no set modification time and + * the default header format will be used if no other format is specified in + * the entry. + * + * @param entry + * the CPIO cpioEntry to be written + * @throws IOException + * if an I/O error has occurred or if a CPIO file error has + * occurred + * @throws ClassCastException if entry is not an instance of CpioArchiveEntry + */ + @Override + public void putArchiveEntry(final ArchiveEntry entry) throws IOException { + if(finished) { + throw new IOException("Stream has already been finished"); + } + + final CpioArchiveEntry e = (CpioArchiveEntry) entry; + ensureOpen(); + if (this.entry != null) { + closeArchiveEntry(); // close previous entry + } + if (e.getTime() == -1) { + e.setTime(System.currentTimeMillis() / 1000); + } + + final short format = e.getFormat(); + if (format != this.entryFormat){ + throw new IOException("Header format: "+format+" does not match existing format: "+this.entryFormat); + } + + if (this.names.put(e.getName(), e) != null) { + throw new IOException("duplicate entry: " + e.getName()); + } + + writeHeader(e); + this.entry = e; + this.written = 0; + } + + private void writeHeader(final CpioArchiveEntry e) throws IOException { + switch (e.getFormat()) { + case FORMAT_NEW: + out.write(ArchiveUtils.toAsciiBytes(MAGIC_NEW)); + count(6); + writeNewEntry(e); + break; + case FORMAT_NEW_CRC: + out.write(ArchiveUtils.toAsciiBytes(MAGIC_NEW_CRC)); + count(6); + writeNewEntry(e); + break; + case FORMAT_OLD_ASCII: + out.write(ArchiveUtils.toAsciiBytes(MAGIC_OLD_ASCII)); + count(6); + writeOldAsciiEntry(e); + break; + case FORMAT_OLD_BINARY: + final boolean swapHalfWord = true; + writeBinaryLong(MAGIC_OLD_BINARY, 2, swapHalfWord); + writeOldBinaryEntry(e, swapHalfWord); + break; + default: + throw new IOException("unknown format " + e.getFormat()); + } + } + + private void writeNewEntry(final CpioArchiveEntry entry) throws IOException { + long inode = entry.getInode(); + long devMin = entry.getDeviceMin(); + if (CPIO_TRAILER.equals(entry.getName())) { + inode = devMin = 0; + } else { + if (inode == 0 && devMin == 0) { + inode = nextArtificalDeviceAndInode & 0xFFFFFFFF; + devMin = (nextArtificalDeviceAndInode++ >> 32) & 0xFFFFFFFF; + } else { + nextArtificalDeviceAndInode = + Math.max(nextArtificalDeviceAndInode, + inode + 0x100000000L * devMin) + 1; + } + } + + writeAsciiLong(inode, 8, 16); + writeAsciiLong(entry.getMode(), 8, 16); + writeAsciiLong(entry.getUID(), 8, 16); + writeAsciiLong(entry.getGID(), 8, 16); + writeAsciiLong(entry.getNumberOfLinks(), 8, 16); + writeAsciiLong(entry.getTime(), 8, 16); + writeAsciiLong(entry.getSize(), 8, 16); + writeAsciiLong(entry.getDeviceMaj(), 8, 16); + writeAsciiLong(devMin, 8, 16); + writeAsciiLong(entry.getRemoteDeviceMaj(), 8, 16); + writeAsciiLong(entry.getRemoteDeviceMin(), 8, 16); + byte[] name = encode(entry.getName()); + writeAsciiLong(name.length + 1L, 8, 16); + writeAsciiLong(entry.getChksum(), 8, 16); + writeCString(name); + pad(entry.getHeaderPadCount(name.length)); + } + + private void writeOldAsciiEntry(final CpioArchiveEntry entry) + throws IOException { + long inode = entry.getInode(); + long device = entry.getDevice(); + if (CPIO_TRAILER.equals(entry.getName())) { + inode = device = 0; + } else { + if (inode == 0 && device == 0) { + inode = nextArtificalDeviceAndInode & 0777777; + device = (nextArtificalDeviceAndInode++ >> 18) & 0777777; + } else { + nextArtificalDeviceAndInode = + Math.max(nextArtificalDeviceAndInode, + inode + 01000000 * device) + 1; + } + } + + writeAsciiLong(device, 6, 8); + writeAsciiLong(inode, 6, 8); + writeAsciiLong(entry.getMode(), 6, 8); + writeAsciiLong(entry.getUID(), 6, 8); + writeAsciiLong(entry.getGID(), 6, 8); + writeAsciiLong(entry.getNumberOfLinks(), 6, 8); + writeAsciiLong(entry.getRemoteDevice(), 6, 8); + writeAsciiLong(entry.getTime(), 11, 8); + byte[] name = encode(entry.getName()); + writeAsciiLong(name.length + 1L, 6, 8); + writeAsciiLong(entry.getSize(), 11, 8); + writeCString(name); + } + + private void writeOldBinaryEntry(final CpioArchiveEntry entry, + final boolean swapHalfWord) throws IOException { + long inode = entry.getInode(); + long device = entry.getDevice(); + if (CPIO_TRAILER.equals(entry.getName())) { + inode = device = 0; + } else { + if (inode == 0 && device == 0) { + inode = nextArtificalDeviceAndInode & 0xFFFF; + device = (nextArtificalDeviceAndInode++ >> 16) & 0xFFFF; + } else { + nextArtificalDeviceAndInode = + Math.max(nextArtificalDeviceAndInode, + inode + 0x10000 * device) + 1; + } + } + + writeBinaryLong(device, 2, swapHalfWord); + writeBinaryLong(inode, 2, swapHalfWord); + writeBinaryLong(entry.getMode(), 2, swapHalfWord); + writeBinaryLong(entry.getUID(), 2, swapHalfWord); + writeBinaryLong(entry.getGID(), 2, swapHalfWord); + writeBinaryLong(entry.getNumberOfLinks(), 2, swapHalfWord); + writeBinaryLong(entry.getRemoteDevice(), 2, swapHalfWord); + writeBinaryLong(entry.getTime(), 4, swapHalfWord); + byte[] name = encode(entry.getName()); + writeBinaryLong(name.length + 1L, 2, swapHalfWord); + writeBinaryLong(entry.getSize(), 4, swapHalfWord); + writeCString(name); + pad(entry.getHeaderPadCount(name.length)); + } + + /*(non-Javadoc) + * + * @see + * org.apache.commons.compress.archivers.ArchiveOutputStream#closeArchiveEntry + * () + */ + @Override + public void closeArchiveEntry() throws IOException { + if(finished) { + throw new IOException("Stream has already been finished"); + } + + ensureOpen(); + + if (entry == null) { + throw new IOException("Trying to close non-existent entry"); + } + + if (this.entry.getSize() != this.written) { + throw new IOException("invalid entry size (expected " + + this.entry.getSize() + " but got " + this.written + + " bytes)"); + } + pad(this.entry.getDataPadCount()); + if (this.entry.getFormat() == FORMAT_NEW_CRC + && this.crc != this.entry.getChksum()) { + throw new IOException("CRC Error"); + } + this.entry = null; + this.crc = 0; + this.written = 0; + } + + /** + * Writes an array of bytes to the current CPIO entry data. This method will + * block until all the bytes are written. + * + * @param b + * the data to be written + * @param off + * the start offset in the data + * @param len + * the number of bytes that are written + * @throws IOException + * if an I/O error has occurred or if a CPIO file error has + * occurred + */ + @Override + public void write(final byte[] b, final int off, final int len) + throws IOException { + ensureOpen(); + if (off < 0 || len < 0 || off > b.length - len) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return; + } + + if (this.entry == null) { + throw new IOException("no current CPIO entry"); + } + if (this.written + len > this.entry.getSize()) { + throw new IOException("attempt to write past end of STORED entry"); + } + out.write(b, off, len); + this.written += len; + if (this.entry.getFormat() == FORMAT_NEW_CRC) { + for (int pos = 0; pos < len; pos++) { + this.crc += b[pos] & 0xFF; + this.crc &= 0xFFFFFFFFL; + } + } + count(len); + } + + /** + * Finishes writing the contents of the CPIO output stream without closing + * the underlying stream. Use this method when applying multiple filters in + * succession to the same output stream. + * + * @throws IOException + * if an I/O exception has occurred or if a CPIO file error has + * occurred + */ + @Override + public void finish() throws IOException { + ensureOpen(); + if (finished) { + throw new IOException("This archive has already been finished"); + } + + if (this.entry != null) { + throw new IOException("This archive contains unclosed entries."); + } + this.entry = new CpioArchiveEntry(this.entryFormat); + this.entry.setName(CPIO_TRAILER); + this.entry.setNumberOfLinks(1); + writeHeader(this.entry); + closeArchiveEntry(); + + final int lengthOfLastBlock = (int) (getBytesWritten() % blockSize); + if (lengthOfLastBlock != 0) { + pad(blockSize - lengthOfLastBlock); + } + + finished = true; + } + + /** + * Closes the CPIO output stream as well as the stream being filtered. + * + * @throws IOException + * if an I/O error has occurred or if a CPIO file error has + * occurred + */ + @Override + public void close() throws IOException { + try { + if (!finished) { + finish(); + } + } finally { + if (!this.closed) { + out.close(); + this.closed = true; + } + } + } + + private void pad(final int count) throws IOException{ + if (count > 0){ + final byte buff[] = new byte[count]; + out.write(buff); + count(count); + } + } + + private void writeBinaryLong(final long number, final int length, + final boolean swapHalfWord) throws IOException { + final byte tmp[] = CpioUtil.long2byteArray(number, length, swapHalfWord); + out.write(tmp); + count(tmp.length); + } + + private void writeAsciiLong(final long number, final int length, + final int radix) throws IOException { + final StringBuilder tmp = new StringBuilder(); + String tmpStr; + if (radix == 16) { + tmp.append(Long.toHexString(number)); + } else if (radix == 8) { + tmp.append(Long.toOctalString(number)); + } else { + tmp.append(Long.toString(number)); + } + + if (tmp.length() <= length) { + final int insertLength = length - tmp.length(); + for (int pos = 0; pos < insertLength; pos++) { + tmp.insert(0, "0"); + } + tmpStr = tmp.toString(); + } else { + tmpStr = tmp.substring(tmp.length() - length); + } + final byte[] b = ArchiveUtils.toAsciiBytes(tmpStr); + out.write(b); + count(b.length); + } + + /** + * Encodes the given string using the configured encoding. + * + * @param str the String to write + * @throws IOException if the string couldn't be written + * @return result of encoding the string + */ + private byte[] encode(final String str) throws IOException { + final ByteBuffer buf = zipEncoding.encode(str); + final int len = buf.limit() - buf.position(); + return Arrays.copyOfRange(buf.array(), buf.arrayOffset(), buf.arrayOffset() + len); + } + + /** + * Writes an encoded string to the stream followed by \0 + * @param str the String to write + * @throws IOException if the string couldn't be written + */ + private void writeCString(byte[] str) throws IOException { + out.write(str); + out.write('\0'); + count(str.length + 1); + } + + /** + * Creates a new ArchiveEntry. The entryName must be an ASCII encoded string. + * + * @see org.apache.commons.compress.archivers.ArchiveOutputStream#createArchiveEntry(java.io.File, java.lang.String) + */ + @Override + public ArchiveEntry createArchiveEntry(final File inputFile, final String entryName) + throws IOException { + if(finished) { + throw new IOException("Stream has already been finished"); + } + return new CpioArchiveEntry(inputFile, entryName); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioConstants.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioConstants.java new file mode 100644 index 000000000..efba28251 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioConstants.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +/** + * All constants needed by CPIO. + * + * based on code from the jRPM project (jrpm.sourceforge.net) + * + */ +public interface CpioConstants { + /** magic number of a cpio entry in the new format */ + String MAGIC_NEW = "070701"; + + /** magic number of a cpio entry in the new format with crc */ + String MAGIC_NEW_CRC = "070702"; + + /** magic number of a cpio entry in the old ascii format */ + String MAGIC_OLD_ASCII = "070707"; + + /** magic number of a cpio entry in the old binary format */ + int MAGIC_OLD_BINARY = 070707; + + // These FORMAT_ constants are internal to the code + + /** write/read a CpioArchiveEntry in the new format */ + short FORMAT_NEW = 1; + + /** write/read a CpioArchiveEntry in the new format with crc */ + short FORMAT_NEW_CRC = 2; + + /** write/read a CpioArchiveEntry in the old ascii format */ + short FORMAT_OLD_ASCII = 4; + + /** write/read a CpioArchiveEntry in the old binary format */ + short FORMAT_OLD_BINARY = 8; + + /** Mask for both new formats */ + short FORMAT_NEW_MASK = 3; + + /** Mask for both old formats */ + short FORMAT_OLD_MASK = 12; + + /* + * Constants for the MODE bits + */ + + /** Mask for all file type bits. */ + int S_IFMT = 0170000; + + // http://www.opengroup.org/onlinepubs/9699919799/basedefs/cpio.h.html + // has a list of the C_xxx constatnts + + /** Defines a socket */ + int C_ISSOCK = 0140000; + + /** Defines a symbolic link */ + int C_ISLNK = 0120000; + + /** HP/UX network special (C_ISCTG) */ + int C_ISNWK = 0110000; + + /** Defines a regular file */ + int C_ISREG = 0100000; + + /** Defines a block device */ + int C_ISBLK = 0060000; + + /** Defines a directory */ + int C_ISDIR = 0040000; + + /** Defines a character device */ + int C_ISCHR = 0020000; + + /** Defines a pipe */ + int C_ISFIFO = 0010000; + + + /** Set user ID */ + int C_ISUID = 0004000; + + /** Set group ID */ + int C_ISGID = 0002000; + + /** On directories, restricted deletion flag. */ + int C_ISVTX = 0001000; + + + /** Permits the owner of a file to read the file */ + int C_IRUSR = 0000400; + + /** Permits the owner of a file to write to the file */ + int C_IWUSR = 0000200; + + /** Permits the owner of a file to execute the file or to search the directory */ + int C_IXUSR = 0000100; + + + /** Permits a file's group to read the file */ + int C_IRGRP = 0000040; + + /** Permits a file's group to write to the file */ + int C_IWGRP = 0000020; + + /** Permits a file's group to execute the file or to search the directory */ + int C_IXGRP = 0000010; + + + /** Permits others to read the file */ + int C_IROTH = 0000004; + + /** Permits others to write to the file */ + int C_IWOTH = 0000002; + + /** Permits others to execute the file or to search the directory */ + int C_IXOTH = 0000001; + + /** The special trailer marker */ + String CPIO_TRAILER = "TRAILER!!!"; + + /** + * The default block size. + * + * @since 1.1 + */ + int BLOCK_SIZE = 512; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioUtil.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioUtil.java new file mode 100644 index 000000000..f53ea4424 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioUtil.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +/** + * Package private utility class for Cpio + * + * @Immutable + */ +class CpioUtil { + + /** + * Extracts the file type bits from a mode. + */ + static long fileType(final long mode) { + return mode & CpioConstants.S_IFMT; + } + + /** + * Converts a byte array to a long. Halfwords can be swapped by setting + * swapHalfWord=true. + * + * @param number + * An array of bytes containing a number + * @param swapHalfWord + * Swap halfwords ([0][1][2][3]->[1][0][3][2]) + * @return The long value + * @throws UnsupportedOperationException if number length is not a multiple of 2 + */ + static long byteArray2long(final byte[] number, final boolean swapHalfWord) { + if (number.length % 2 != 0) { + throw new UnsupportedOperationException(); + } + + long ret = 0; + int pos = 0; + final byte tmp_number[] = new byte[number.length]; + System.arraycopy(number, 0, tmp_number, 0, number.length); + + if (!swapHalfWord) { + byte tmp = 0; + for (pos = 0; pos < tmp_number.length; pos++) { + tmp = tmp_number[pos]; + tmp_number[pos++] = tmp_number[pos]; + tmp_number[pos] = tmp; + } + } + + ret = tmp_number[0] & 0xFF; + for (pos = 1; pos < tmp_number.length; pos++) { + ret <<= 8; + ret |= tmp_number[pos] & 0xFF; + } + return ret; + } + + /** + * Converts a long number to a byte array + * Halfwords can be swapped by setting swapHalfWord=true. + * + * @param number + * the input long number to be converted + * + * @param length + * The length of the returned array + * @param swapHalfWord + * Swap halfwords ([0][1][2][3]->[1][0][3][2]) + * @return The long value + * @throws UnsupportedOperationException if the length is not a positive multiple of two + */ + static byte[] long2byteArray(final long number, final int length, + final boolean swapHalfWord) { + final byte[] ret = new byte[length]; + int pos = 0; + long tmp_number = 0; + + if (length % 2 != 0 || length < 2) { + throw new UnsupportedOperationException(); + } + + tmp_number = number; + for (pos = length - 1; pos >= 0; pos--) { + ret[pos] = (byte) (tmp_number & 0xFF); + tmp_number >>= 8; + } + + if (!swapHalfWord) { + byte tmp = 0; + for (pos = 0; pos < length; pos++) { + tmp = ret[pos]; + ret[pos++] = ret[pos]; + ret[pos] = tmp; + } + } + + return ret; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/package.html b/src/main/java/org/apache/commons/compress/archivers/cpio/package.html new file mode 100644 index 000000000..985828725 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for reading and writing archives using + the CPIO format.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/Dirent.java b/src/main/java/org/apache/commons/compress/archivers/dump/Dirent.java new file mode 100644 index 000000000..b5af964a3 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/Dirent.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +/** + * Directory entry. + */ +class Dirent { + private final int ino; + private final int parentIno; + private final int type; + private final String name; + + /** + * Constructor + * + * @param ino + * @param parentIno + * @param type + * @param name + */ + Dirent(final int ino, final int parentIno, final int type, final String name) { + this.ino = ino; + this.parentIno = parentIno; + this.type = type; + this.name = name; + } + + /** + * Get ino. + * @return the i-node + */ + int getIno() { + return ino; + } + + /** + * Get ino of parent directory. + * @return the parent i-node + */ + int getParentIno() { + return parentIno; + } + + /** + * Get entry type. + * @return the entry type + */ + int getType() { + return type; + } + + /** + * Get name of directory entry. + * + * <p>This method returns the raw name as it is stored inside of the archive.</p> + * + * @return the directory name + */ + String getName() { + return name; + } + + /** + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return String.format("[%d]: %s", ino, name); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveConstants.java b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveConstants.java new file mode 100644 index 000000000..2430f4aa2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveConstants.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +/** + * Various constants associated with dump archives. + */ +public final class DumpArchiveConstants { + public static final int TP_SIZE = 1024; + public static final int NTREC = 10; + public static final int HIGH_DENSITY_NTREC = 32; + public static final int OFS_MAGIC = 60011; + public static final int NFS_MAGIC = 60012; + public static final int FS_UFS2_MAGIC = 0x19540119; + public static final int CHECKSUM = 84446; + public static final int LBLSIZE = 16; + public static final int NAMELEN = 64; + + /* do not instantiate */ + private DumpArchiveConstants() { + } + + /** + * The type of tape segment. + */ + public enum SEGMENT_TYPE { + TAPE(1), + INODE(2), + BITS(3), + ADDR(4), + END(5), + CLRI(6); + + int code; + + SEGMENT_TYPE(final int code) { + this.code = code; + } + + public static SEGMENT_TYPE find(final int code) { + for (final SEGMENT_TYPE t : values()) { + if (t.code == code) { + return t; + } + } + + return null; + } + } + + /** + * The type of compression. + */ + public enum COMPRESSION_TYPE { + ZLIB(0), + BZLIB(1), + LZO(2); + + int code; + + COMPRESSION_TYPE(final int code) { + this.code = code; + } + + public static COMPRESSION_TYPE find(final int code) { + for (final COMPRESSION_TYPE t : values()) { + if (t.code == code) { + return t; + } + } + + return null; + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveEntry.java new file mode 100644 index 000000000..e284505f2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveEntry.java @@ -0,0 +1,845 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import java.util.Collections; +import java.util.Date; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Set; +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * This class represents an entry in a Dump archive. It consists + * of the entry's header, the entry's File and any extended attributes. + * <p> + * DumpEntries that are created from the header bytes read from + * an archive are instantiated with the DumpArchiveEntry( byte[] ) + * constructor. These entries will be used when extracting from + * or listing the contents of an archive. These entries have their + * header filled in using the header bytes. They also set the File + * to null, since they reference an archive entry not a file. + * <p> + * DumpEntries can also be constructed from nothing but a name. + * This allows the programmer to construct the entry by hand, for + * instance when only an InputStream is available for writing to + * the archive, and the header information is constructed from + * other information. In this case the header fields are set to + * defaults and the File is set to null. + * + * <p> + * The C structure for a Dump Entry's header is: + * <pre> + * #define TP_BSIZE 1024 // size of each file block + * #define NTREC 10 // number of blocks to write at once + * #define HIGHDENSITYTREC 32 // number of blocks to write on high-density tapes + * #define TP_NINDIR (TP_BSIZE/2) // number if indirect inodes in record + * #define TP_NINOS (TP_NINDIR / sizeof (int32_t)) + * #define LBLSIZE 16 + * #define NAMELEN 64 + * + * #define OFS_MAGIC (int)60011 // old format magic value + * #define NFS_MAGIC (int)60012 // new format magic value + * #define FS_UFS2_MAGIC (int)0x19540119 + * #define CHECKSUM (int)84446 // constant used in checksum algorithm + * + * struct s_spcl { + * int32_t c_type; // record type (see below) + * int32_t <b>c_date</b>; // date of this dump + * int32_t <b>c_ddate</b>; // date of previous dump + * int32_t c_volume; // dump volume number + * u_int32_t c_tapea; // logical block of this record + * dump_ino_t c_ino; // number of inode + * int32_t <b>c_magic</b>; // magic number (see above) + * int32_t c_checksum; // record checksum + * #ifdef __linux__ + * struct new_bsd_inode c_dinode; + * #else + * #ifdef sunos + * struct new_bsd_inode c_dinode; + * #else + * struct dinode c_dinode; // ownership and mode of inode + * #endif + * #endif + * int32_t c_count; // number of valid c_addr entries + * union u_data c_data; // see above + * char <b>c_label[LBLSIZE]</b>; // dump label + * int32_t <b>c_level</b>; // level of this dump + * char <b>c_filesys[NAMELEN]</b>; // name of dumpped file system + * char <b>c_dev[NAMELEN]</b>; // name of dumpped device + * char <b>c_host[NAMELEN]</b>; // name of dumpped host + * int32_t c_flags; // additional information (see below) + * int32_t c_firstrec; // first record on volume + * int32_t c_ntrec; // blocksize on volume + * int32_t c_extattributes; // additional inode info (see below) + * int32_t c_spare[30]; // reserved for future uses + * } s_spcl; + * + * // + * // flag values + * // + * #define DR_NEWHEADER 0x0001 // new format tape header + * #define DR_NEWINODEFMT 0x0002 // new format inodes on tape + * #define DR_COMPRESSED 0x0080 // dump tape is compressed + * #define DR_METAONLY 0x0100 // only the metadata of the inode has been dumped + * #define DR_INODEINFO 0x0002 // [SIC] TS_END header contains c_inos information + * #define DR_EXTATTRIBUTES 0x8000 + * + * // + * // extattributes inode info + * // + * #define EXT_REGULAR 0 + * #define EXT_MACOSFNDRINFO 1 + * #define EXT_MACOSRESFORK 2 + * #define EXT_XATTR 3 + * + * // used for EA on tape + * #define EXT2_GOOD_OLD_INODE_SIZE 128 + * #define EXT2_XATTR_MAGIC 0xEA020000 // block EA + * #define EXT2_XATTR_MAGIC2 0xEA020001 // in inode EA + * </pre> + * <p> + * The fields in <b>bold</b> are the same for all blocks. (This permitted + * multiple dumps to be written to a single tape.) + * </p> + * + * <p> + * The C structure for the inode (file) information is: + * <pre> + * struct bsdtimeval { // **** alpha-*-linux is deviant + * __u32 tv_sec; + * __u32 tv_usec; + * }; + * + * #define NDADDR 12 + * #define NIADDR 3 + * + * // + * // This is the new (4.4) BSD inode structure + * // copied from the FreeBSD 2.0 <ufs/ufs/dinode.h> include file + * // + * struct new_bsd_inode { + * __u16 di_mode; // file type, standard Unix permissions + * __s16 di_nlink; // number of hard links to file. + * union { + * __u16 oldids[2]; + * __u32 inumber; + * } di_u; + * u_quad_t di_size; // file size + * struct bsdtimeval di_atime; // time file was last accessed + * struct bsdtimeval di_mtime; // time file was last modified + * struct bsdtimeval di_ctime; // time file was created + * __u32 di_db[NDADDR]; + * __u32 di_ib[NIADDR]; + * __u32 di_flags; // + * __s32 di_blocks; // number of disk blocks + * __s32 di_gen; // generation number + * __u32 di_uid; // user id (see /etc/passwd) + * __u32 di_gid; // group id (see /etc/group) + * __s32 di_spare[2]; // unused + * }; + * </pre> + * <p> + * It is important to note that the header DOES NOT have the name of the + * file. It can't since hard links mean that you may have multiple filenames + * for a single physical file. You must read the contents of the directory + * entries to learn the mapping(s) from filename to inode. + * </p> + * + * <p> + * The C structure that indicates if a specific block is a real block + * that contains data or is a sparse block that is not persisted to the + * disk is:</p> + * <pre> + * #define TP_BSIZE 1024 + * #define TP_NINDIR (TP_BSIZE/2) + * + * union u_data { + * char s_addrs[TP_NINDIR]; // 1 => data; 0 => hole in inode + * int32_t s_inos[TP_NINOS]; // table of first inode on each volume + * } u_data; + * </pre> + * + * @NotThreadSafe + */ +public class DumpArchiveEntry implements ArchiveEntry { + private String name; + private TYPE type = TYPE.UNKNOWN; + private int mode; + private Set<PERMISSION> permissions = Collections.emptySet(); + private long size; + private long atime; + private long mtime; + private int uid; + private int gid; + + /** + * Currently unused + */ + private final DumpArchiveSummary summary = null; + + // this information is available from standard index. + private final TapeSegmentHeader header = new TapeSegmentHeader(); + private String simpleName; + private String originalName; + + // this information is available from QFA index + private int volume; + private long offset; + private int ino; + private int nlink; + private long ctime; + private int generation; + private boolean isDeleted; + + /** + * Default constructor. + */ + public DumpArchiveEntry() { + } + + /** + * Constructor taking only filename. + * @param name pathname + * @param simpleName actual filename. + */ + public DumpArchiveEntry(final String name, final String simpleName) { + setName(name); + this.simpleName = simpleName; + } + + /** + * Constructor taking name, inode and type. + * + * @param name the name + * @param simpleName the simple name + * @param ino the ino + * @param type the type + */ + protected DumpArchiveEntry(final String name, final String simpleName, final int ino, + final TYPE type) { + setType(type); + setName(name); + this.simpleName = simpleName; + this.ino = ino; + this.offset = 0; + } + + /** + * Returns the path of the entry. + * @return the path of the entry. + */ + public String getSimpleName() { + return simpleName; + } + + /** + * Sets the path of the entry. + * @param simpleName the simple name + */ + protected void setSimpleName(final String simpleName) { + this.simpleName = simpleName; + } + + /** + * Returns the ino of the entry. + * @return the ino + */ + public int getIno() { + return header.getIno(); + } + + /** + * Return the number of hard links to the entry. + * @return the number of hard links + */ + public int getNlink() { + return nlink; + } + + /** + * Set the number of hard links. + * @param nlink the number of hard links + */ + public void setNlink(final int nlink) { + this.nlink = nlink; + } + + /** + * Get file creation time. + * @return the creation time + */ + public Date getCreationTime() { + return new Date(ctime); + } + + /** + * Set the file creation time. + * @param ctime the creation time + */ + public void setCreationTime(final Date ctime) { + this.ctime = ctime.getTime(); + } + + /** + * Return the generation of the file. + * @return the generation + */ + public int getGeneration() { + return generation; + } + + /** + * Set the generation of the file. + * @param generation the generation + */ + public void setGeneration(final int generation) { + this.generation = generation; + } + + /** + * Has this file been deleted? (On valid on incremental dumps.) + * @return whether the file has been deleted + */ + public boolean isDeleted() { + return isDeleted; + } + + /** + * Set whether this file has been deleted. + * @param isDeleted whether the file has been deleted + */ + public void setDeleted(final boolean isDeleted) { + this.isDeleted = isDeleted; + } + + /** + * Return the offset within the archive + * @return the offset + */ + public long getOffset() { + return offset; + } + + /** + * Set the offset within the archive. + * @param offset the offset + */ + public void setOffset(final long offset) { + this.offset = offset; + } + + /** + * Return the tape volume where this file is located. + * @return the volume + */ + public int getVolume() { + return volume; + } + + /** + * Set the tape volume. + * @param volume the volume + */ + public void setVolume(final int volume) { + this.volume = volume; + } + + /** + * Return the type of the tape segment header. + * @return the segment header + */ + public DumpArchiveConstants.SEGMENT_TYPE getHeaderType() { + return header.getType(); + } + + /** + * Return the number of records in this segment. + * @return the number of records + */ + public int getHeaderCount() { + return header.getCount(); + } + + /** + * Return the number of sparse records in this segment. + * @return the number of sparse records + */ + public int getHeaderHoles() { + return header.getHoles(); + } + + /** + * Is this a sparse record? + * @param idx index of the record to check + * @return whether this is a sparse record + */ + public boolean isSparseRecord(final int idx) { + return (header.getCdata(idx) & 0x01) == 0; + } + + @Override + public int hashCode() { + return ino; + } + + @Override + public boolean equals(final Object o) { + if (o == this) { + return true; + } else if (o == null || !o.getClass().equals(getClass())) { + return false; + } + + final DumpArchiveEntry rhs = (DumpArchiveEntry) o; + + if (rhs.header == null) { + return false; + } + + if (ino != rhs.ino) { + return false; + } + + // summary is always null right now, but this may change some day + if ((summary == null && rhs.summary != null) // NOSONAR + || (summary != null && !summary.equals(rhs.summary))) { // NOSONAR + return false; + } + + return true; + } + + @Override + public String toString() { + return getName(); + } + + /** + * Populate the dump archive entry and tape segment header with + * the contents of the buffer. + * + * @param buffer buffer to read content from + */ + static DumpArchiveEntry parse(final byte[] buffer) { + final DumpArchiveEntry entry = new DumpArchiveEntry(); + final TapeSegmentHeader header = entry.header; + + header.type = DumpArchiveConstants.SEGMENT_TYPE.find(DumpArchiveUtil.convert32( + buffer, 0)); + + //header.dumpDate = new Date(1000L * DumpArchiveUtil.convert32(buffer, 4)); + //header.previousDumpDate = new Date(1000L * DumpArchiveUtil.convert32( + // buffer, 8)); + header.volume = DumpArchiveUtil.convert32(buffer, 12); + //header.tapea = DumpArchiveUtil.convert32(buffer, 16); + entry.ino = header.ino = DumpArchiveUtil.convert32(buffer, 20); + + //header.magic = DumpArchiveUtil.convert32(buffer, 24); + //header.checksum = DumpArchiveUtil.convert32(buffer, 28); + final int m = DumpArchiveUtil.convert16(buffer, 32); + + // determine the type of the file. + entry.setType(TYPE.find((m >> 12) & 0x0F)); + + // determine the standard permissions + entry.setMode(m); + + entry.nlink = DumpArchiveUtil.convert16(buffer, 34); + // inumber, oldids? + entry.setSize(DumpArchiveUtil.convert64(buffer, 40)); + + long t = (1000L * DumpArchiveUtil.convert32(buffer, 48)) + + (DumpArchiveUtil.convert32(buffer, 52) / 1000); + entry.setAccessTime(new Date(t)); + t = (1000L * DumpArchiveUtil.convert32(buffer, 56)) + + (DumpArchiveUtil.convert32(buffer, 60) / 1000); + entry.setLastModifiedDate(new Date(t)); + t = (1000L * DumpArchiveUtil.convert32(buffer, 64)) + + (DumpArchiveUtil.convert32(buffer, 68) / 1000); + entry.ctime = t; + + // db: 72-119 - direct blocks + // id: 120-131 - indirect blocks + //entry.flags = DumpArchiveUtil.convert32(buffer, 132); + //entry.blocks = DumpArchiveUtil.convert32(buffer, 136); + entry.generation = DumpArchiveUtil.convert32(buffer, 140); + entry.setUserId(DumpArchiveUtil.convert32(buffer, 144)); + entry.setGroupId(DumpArchiveUtil.convert32(buffer, 148)); + // two 32-bit spare values. + header.count = DumpArchiveUtil.convert32(buffer, 160); + + header.holes = 0; + + for (int i = 0; (i < 512) && (i < header.count); i++) { + if (buffer[164 + i] == 0) { + header.holes++; + } + } + + System.arraycopy(buffer, 164, header.cdata, 0, 512); + + entry.volume = header.getVolume(); + + //entry.isSummaryOnly = false; + return entry; + } + + /** + * Update entry with information from next tape segment header. + */ + void update(final byte[] buffer) { + header.volume = DumpArchiveUtil.convert32(buffer, 16); + header.count = DumpArchiveUtil.convert32(buffer, 160); + + header.holes = 0; + + for (int i = 0; (i < 512) && (i < header.count); i++) { + if (buffer[164 + i] == 0) { + header.holes++; + } + } + + System.arraycopy(buffer, 164, header.cdata, 0, 512); + } + + /** + * Archive entry as stored on tape. There is one TSH for (at most) + * every 512k in the file. + */ + static class TapeSegmentHeader { + private DumpArchiveConstants.SEGMENT_TYPE type; + private int volume; + private int ino; + private int count; + private int holes; + private final byte[] cdata = new byte[512]; // map of any 'holes' + + public DumpArchiveConstants.SEGMENT_TYPE getType() { + return type; + } + + public int getVolume() { + return volume; + } + + public int getIno() { + return ino; + } + + void setIno(final int ino) { + this.ino = ino; + } + + public int getCount() { + return count; + } + + public int getHoles() { + return holes; + } + + public int getCdata(final int idx) { + return cdata[idx]; + } + } + + /** + * Returns the name of the entry. + * + * <p>This method returns the raw name as it is stored inside of the archive.</p> + * + * @return the name of the entry. + */ + @Override + public String getName() { + return name; + } + + /** + * Returns the unmodified name of the entry. + * @return the name of the entry. + */ + String getOriginalName() { + return originalName; + } + + /** + * Sets the name of the entry. + * @param name the name + */ + public final void setName(String name) { + this.originalName = name; + if (name != null) { + if (isDirectory() && !name.endsWith("/")) { + name += "/"; + } + if (name.startsWith("./")) { + name = name.substring(2); + } + } + this.name = name; + } + + /** + * The last modified date. + * @return the last modified date + */ + @Override + public Date getLastModifiedDate() { + return new Date(mtime); + } + + /** + * Is this a directory? + * @return whether this is a directory + */ + @Override + public boolean isDirectory() { + return type == TYPE.DIRECTORY; + } + + /** + * Is this a regular file? + * @return whether this is a regular file + */ + public boolean isFile() { + return type == TYPE.FILE; + } + + /** + * Is this a network device? + * @return whether this is a socket + */ + public boolean isSocket() { + return type == TYPE.SOCKET; + } + + /** + * Is this a character device? + * @return whether this is a character device + */ + public boolean isChrDev() { + return type == TYPE.CHRDEV; + } + + /** + * Is this a block device? + * @return whether this is a block device + */ + public boolean isBlkDev() { + return type == TYPE.BLKDEV; + } + + /** + * Is this a fifo/pipe? + * @return whether this is a fifo + */ + public boolean isFifo() { + return type == TYPE.FIFO; + } + + /** + * Get the type of the entry. + * @return the type + */ + public TYPE getType() { + return type; + } + + /** + * Set the type of the entry. + * @param type the type + */ + public void setType(final TYPE type) { + this.type = type; + } + + /** + * Return the access permissions on the entry. + * @return the access permissions + */ + public int getMode() { + return mode; + } + + /** + * Set the access permissions on the entry. + * @param mode the access permissions + */ + public void setMode(final int mode) { + this.mode = mode & 07777; + this.permissions = PERMISSION.find(mode); + } + + /** + * Returns the permissions on the entry. + * @return the permissions + */ + public Set<PERMISSION> getPermissions() { + return permissions; + } + + /** + * Returns the size of the entry. + * @return the size + */ + @Override + public long getSize() { + return isDirectory() ? SIZE_UNKNOWN : size; + } + + /** + * Returns the size of the entry as read from the archive. + */ + long getEntrySize() { + return size; + } + + /** + * Set the size of the entry. + * @param size the size + */ + public void setSize(final long size) { + this.size = size; + } + + /** + * Set the time the file was last modified. + * @param mtime the last modified time + */ + public void setLastModifiedDate(final Date mtime) { + this.mtime = mtime.getTime(); + } + + /** + * Returns the time the file was last accessed. + * @return the access time + */ + public Date getAccessTime() { + return new Date(atime); + } + + /** + * Set the time the file was last accessed. + * @param atime the access time + */ + public void setAccessTime(final Date atime) { + this.atime = atime.getTime(); + } + + /** + * Return the user id. + * @return the user id + */ + public int getUserId() { + return uid; + } + + /** + * Set the user id. + * @param uid the user id + */ + public void setUserId(final int uid) { + this.uid = uid; + } + + /** + * Return the group id + * @return the group id + */ + public int getGroupId() { + return gid; + } + + /** + * Set the group id. + * @param gid the group id + */ + public void setGroupId(final int gid) { + this.gid = gid; + } + + public enum TYPE { + WHITEOUT(14), + SOCKET(12), + LINK(10), + FILE(8), + BLKDEV(6), + DIRECTORY(4), + CHRDEV(2), + FIFO(1), + UNKNOWN(15); + + private int code; + + TYPE(final int code) { + this.code = code; + } + + public static TYPE find(final int code) { + TYPE type = UNKNOWN; + + for (final TYPE t : TYPE.values()) { + if (code == t.code) { + type = t; + } + } + + return type; + } + } + + public enum PERMISSION { + SETUID(04000), + SETGUI(02000), + STICKY(01000), + USER_READ(00400), + USER_WRITE(00200), + USER_EXEC(00100), + GROUP_READ(00040), + GROUP_WRITE(00020), + GROUP_EXEC(00010), + WORLD_READ(00004), + WORLD_WRITE(00002), + WORLD_EXEC(00001); + + private int code; + + PERMISSION(final int code) { + this.code = code; + } + + public static Set<PERMISSION> find(final int code) { + final Set<PERMISSION> set = new HashSet<>(); + + for (final PERMISSION p : PERMISSION.values()) { + if ((code & p.code) == p.code) { + set.add(p); + } + } + + if (set.isEmpty()) { + return Collections.emptySet(); + } + + return EnumSet.copyOf(set); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveException.java b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveException.java new file mode 100644 index 000000000..635b1d9fb --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveException.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import java.io.IOException; + + +/** + * Dump Archive Exception + */ +public class DumpArchiveException extends IOException { + private static final long serialVersionUID = 1L; + + public DumpArchiveException() { + } + + public DumpArchiveException(final String msg) { + super(msg); + } + + public DumpArchiveException(final Throwable cause) { + initCause(cause); + } + + public DumpArchiveException(final String msg, final Throwable cause) { + super(msg); + initCause(cause); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java new file mode 100644 index 000000000..ed4f02fa0 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java @@ -0,0 +1,561 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Stack; + +/** + * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. + * Methods are provided to position at each successive entry in + * the archive, and the read each entry as a normal input stream + * using read(). + * + * There doesn't seem to exist a hint on the encoding of string values + * in any piece documentation. Given the main purpose of dump/restore + * is backing up a system it seems very likely the format uses the + * current default encoding of the system. + * + * @NotThreadSafe + */ +public class DumpArchiveInputStream extends ArchiveInputStream { + private DumpArchiveSummary summary; + private DumpArchiveEntry active; + private boolean isClosed; + private boolean hasHitEOF; + private long entrySize; + private long entryOffset; + private int readIdx; + private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; + private byte[] blockBuffer; + private int recordOffset; + private long filepos; + protected TapeInputStream raw; + + // map of ino -> dirent entry. We can use this to reconstruct full paths. + private final Map<Integer, Dirent> names = new HashMap<>(); + + // map of ino -> (directory) entry when we're missing one or more elements in the path. + private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>(); + + // queue of (directory) entries where we now have the full path. + private Queue<DumpArchiveEntry> queue; + + /** + * The encoding to use for filenames and labels. + */ + private final ZipEncoding zipEncoding; + + // the provided encoding (for unit tests) + final String encoding; + + /** + * Constructor using the platform's default encoding for file + * names. + * + * @param is stream to read from + * @throws ArchiveException on error + */ + public DumpArchiveInputStream(final InputStream is) throws ArchiveException { + this(is, null); + } + + /** + * Constructor. + * + * @param is stream to read from + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @since 1.6 + * @throws ArchiveException on error + */ + public DumpArchiveInputStream(final InputStream is, final String encoding) + throws ArchiveException { + this.raw = new TapeInputStream(is); + this.hasHitEOF = false; + this.encoding = encoding; + this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + + try { + // read header, verify it's a dump archive. + final byte[] headerBytes = raw.readRecord(); + + if (!DumpArchiveUtil.verify(headerBytes)) { + throw new UnrecognizedFormatException(); + } + + // get summary information + summary = new DumpArchiveSummary(headerBytes, this.zipEncoding); + + // reset buffer with actual block size. + raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); + + // allocate our read buffer. + blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; + + // skip past CLRI and BITS segments since we don't handle them yet. + readCLRI(); + readBITS(); + } catch (final IOException ex) { + throw new ArchiveException(ex.getMessage(), ex); + } + + // put in a dummy record for the root node. + final Dirent root = new Dirent(2, 2, 4, "."); + names.put(2, root); + + // use priority based on queue to ensure parent directories are + // released first. + queue = new PriorityQueue<>(10, + new Comparator<DumpArchiveEntry>() { + @Override + public int compare(final DumpArchiveEntry p, final DumpArchiveEntry q) { + if (p.getOriginalName() == null || q.getOriginalName() == null) { + return Integer.MAX_VALUE; + } + + return p.getOriginalName().compareTo(q.getOriginalName()); + } + }); + } + + @Deprecated + @Override + public int getCount() { + return (int) getBytesRead(); + } + + @Override + public long getBytesRead() { + return raw.getBytesRead(); + } + + /** + * Return the archive summary information. + * @return the summary + */ + public DumpArchiveSummary getSummary() { + return summary; + } + + /** + * Read CLRI (deleted inode) segment. + */ + private void readCLRI() throws IOException { + final byte[] buffer = raw.readRecord(); + + if (!DumpArchiveUtil.verify(buffer)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(buffer); + + if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { + throw new InvalidFormatException(); + } + + // we don't do anything with this yet. + if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) + == -1) { + throw new EOFException(); + } + readIdx = active.getHeaderCount(); + } + + /** + * Read BITS segment. + */ + private void readBITS() throws IOException { + final byte[] buffer = raw.readRecord(); + + if (!DumpArchiveUtil.verify(buffer)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(buffer); + + if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { + throw new InvalidFormatException(); + } + + // we don't do anything with this yet. + if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) + == -1) { + throw new EOFException(); + } + readIdx = active.getHeaderCount(); + } + + /** + * Read the next entry. + * @return the next entry + * @throws IOException on error + */ + public DumpArchiveEntry getNextDumpEntry() throws IOException { + return getNextEntry(); + } + + @Override + public DumpArchiveEntry getNextEntry() throws IOException { + DumpArchiveEntry entry = null; + String path = null; + + // is there anything in the queue? + if (!queue.isEmpty()) { + return queue.remove(); + } + + while (entry == null) { + if (hasHitEOF) { + return null; + } + + // skip any remaining records in this segment for prior file. + // we might still have holes... easiest to do it + // block by block. We may want to revisit this if + // the unnecessary decompression time adds up. + while (readIdx < active.getHeaderCount()) { + if (!active.isSparseRecord(readIdx++) + && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { + throw new EOFException(); + } + } + + readIdx = 0; + filepos = raw.getBytesRead(); + + byte[] headerBytes = raw.readRecord(); + + if (!DumpArchiveUtil.verify(headerBytes)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(headerBytes); + + // skip any remaining segments for prior file. + while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { + if (raw.skip((long) DumpArchiveConstants.TP_SIZE + * (active.getHeaderCount() + - active.getHeaderHoles())) == -1) { + throw new EOFException(); + } + + filepos = raw.getBytesRead(); + headerBytes = raw.readRecord(); + + if (!DumpArchiveUtil.verify(headerBytes)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(headerBytes); + } + + // check if this is an end-of-volume marker. + if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { + hasHitEOF = true; + + return null; + } + + entry = active; + + if (entry.isDirectory()) { + readDirectoryEntry(active); + + // now we create an empty InputStream. + entryOffset = 0; + entrySize = 0; + readIdx = active.getHeaderCount(); + } else { + entryOffset = 0; + entrySize = active.getEntrySize(); + readIdx = 0; + } + + recordOffset = readBuf.length; + + path = getPath(entry); + + if (path == null) { + entry = null; + } + } + + entry.setName(path); + entry.setSimpleName(names.get(entry.getIno()).getName()); + entry.setOffset(filepos); + + return entry; + } + + /** + * Read directory entry. + */ + private void readDirectoryEntry(DumpArchiveEntry entry) + throws IOException { + long size = entry.getEntrySize(); + boolean first = true; + + while (first || + DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) { + // read the header that we just peeked at. + if (!first) { + raw.readRecord(); + } + + if (!names.containsKey(entry.getIno()) && + DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) { + pending.put(entry.getIno(), entry); + } + + final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); + + if (blockBuffer.length < datalen) { + blockBuffer = new byte[datalen]; + } + + if (raw.read(blockBuffer, 0, datalen) != datalen) { + throw new EOFException(); + } + + int reclen = 0; + + for (int i = 0; i < datalen - 8 && i < size - 8; + i += reclen) { + final int ino = DumpArchiveUtil.convert32(blockBuffer, i); + reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); + + final byte type = blockBuffer[i + 6]; + + final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]); + + if (".".equals(name) || "..".equals(name)) { + // do nothing... + continue; + } + + final Dirent d = new Dirent(ino, entry.getIno(), type, name); + + /* + if ((type == 4) && names.containsKey(ino)) { + System.out.println("we already have ino: " + + names.get(ino)); + } + */ + + names.put(ino, d); + + // check whether this allows us to fill anything in the pending list. + for (final Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) { + final String path = getPath(e.getValue()); + + if (path != null) { + e.getValue().setName(path); + e.getValue() + .setSimpleName(names.get(e.getKey()).getName()); + queue.add(e.getValue()); + } + } + + // remove anything that we found. (We can't do it earlier + // because of concurrent modification exceptions.) + for (final DumpArchiveEntry e : queue) { + pending.remove(e.getIno()); + } + } + + final byte[] peekBytes = raw.peek(); + + if (!DumpArchiveUtil.verify(peekBytes)) { + throw new InvalidFormatException(); + } + + entry = DumpArchiveEntry.parse(peekBytes); + first = false; + size -= DumpArchiveConstants.TP_SIZE; + } + } + + /** + * Get full path for specified archive entry, or null if there's a gap. + * + * @param entry + * @return full path for specified archive entry, or null if there's a gap. + */ + private String getPath(final DumpArchiveEntry entry) { + // build the stack of elements. It's possible that we're + // still missing an intermediate value and if so we + final Stack<String> elements = new Stack<>(); + Dirent dirent = null; + + for (int i = entry.getIno();; i = dirent.getParentIno()) { + if (!names.containsKey(i)) { + elements.clear(); + break; + } + + dirent = names.get(i); + elements.push(dirent.getName()); + + if (dirent.getIno() == dirent.getParentIno()) { + break; + } + } + + // if an element is missing defer the work and read next entry. + if (elements.isEmpty()) { + pending.put(entry.getIno(), entry); + + return null; + } + + // generate full path from stack of elements. + final StringBuilder sb = new StringBuilder(elements.pop()); + + while (!elements.isEmpty()) { + sb.append('/'); + sb.append(elements.pop()); + } + + return sb.toString(); + } + + /** + * Reads bytes from the current dump archive entry. + * + * This method is aware of the boundaries of the current + * entry in the archive and will deal with them as if they + * were this stream's start and EOF. + * + * @param buf The buffer into which to place bytes read. + * @param off The offset at which to place bytes read. + * @param len The number of bytes to read. + * @return The number of bytes read, or -1 at EOF. + * @throws IOException on error + */ + @Override + public int read(final byte[] buf, int off, int len) throws IOException { + int totalRead = 0; + + if (hasHitEOF || isClosed || entryOffset >= entrySize) { + return -1; + } + + if (active == null) { + throw new IllegalStateException("No current dump entry"); + } + + if (len + entryOffset > entrySize) { + len = (int) (entrySize - entryOffset); + } + + while (len > 0) { + final int sz = len > readBuf.length - recordOffset + ? readBuf.length - recordOffset : len; + + // copy any data we have + if (recordOffset + sz <= readBuf.length) { + System.arraycopy(readBuf, recordOffset, buf, off, sz); + totalRead += sz; + recordOffset += sz; + len -= sz; + off += sz; + } + + // load next block if necessary. + if (len > 0) { + if (readIdx >= 512) { + final byte[] headerBytes = raw.readRecord(); + + if (!DumpArchiveUtil.verify(headerBytes)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(headerBytes); + readIdx = 0; + } + + if (!active.isSparseRecord(readIdx++)) { + final int r = raw.read(readBuf, 0, readBuf.length); + if (r != readBuf.length) { + throw new EOFException(); + } + } else { + Arrays.fill(readBuf, (byte) 0); + } + + recordOffset = 0; + } + } + + entryOffset += totalRead; + + return totalRead; + } + + /** + * Closes the stream for this entry. + */ + @Override + public void close() throws IOException { + if (!isClosed) { + isClosed = true; + raw.close(); + } + } + + /** + * Look at the first few bytes of the file to decide if it's a dump + * archive. With 32 bytes we can look at the magic value, with a full + * 1k we can verify the checksum. + * @param buffer data to match + * @param length length of data + * @return whether the buffer seems to contain dump data + */ + public static boolean matches(final byte[] buffer, final int length) { + // do we have enough of the header? + if (length < 32) { + return false; + } + + // this is the best test + if (length >= DumpArchiveConstants.TP_SIZE) { + return DumpArchiveUtil.verify(buffer); + } + + // this will work in a pinch. + return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, + 24); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java new file mode 100644 index 000000000..f17e70f13 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import java.io.IOException; +import java.util.Date; + +import org.apache.commons.compress.archivers.zip.ZipEncoding; + +/** + * This class represents identifying information about a Dump archive volume. + * It consists the archive's dump date, label, hostname, device name and possibly + * last mount point plus the volume's volume id andfirst record number. + * + * For the corresponding C structure see the header of {@link DumpArchiveEntry}. + */ +public class DumpArchiveSummary { + private long dumpDate; + private long previousDumpDate; + private int volume; + private String label; + private int level; + private String filesys; + private String devname; + private String hostname; + private int flags; + private int firstrec; + private int ntrec; + + DumpArchiveSummary(final byte[] buffer, final ZipEncoding encoding) throws IOException { + dumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 4); + previousDumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 8); + volume = DumpArchiveUtil.convert32(buffer, 12); + label = DumpArchiveUtil.decode(encoding, buffer, 676, DumpArchiveConstants.LBLSIZE).trim(); + level = DumpArchiveUtil.convert32(buffer, 692); + filesys = DumpArchiveUtil.decode(encoding, buffer, 696, DumpArchiveConstants.NAMELEN).trim(); + devname = DumpArchiveUtil.decode(encoding, buffer, 760, DumpArchiveConstants.NAMELEN).trim(); + hostname = DumpArchiveUtil.decode(encoding, buffer, 824, DumpArchiveConstants.NAMELEN).trim(); + flags = DumpArchiveUtil.convert32(buffer, 888); + firstrec = DumpArchiveUtil.convert32(buffer, 892); + ntrec = DumpArchiveUtil.convert32(buffer, 896); + + //extAttributes = DumpArchiveUtil.convert32(buffer, 900); + } + + /** + * Get the date of this dump. + * @return the date of this dump. + */ + public Date getDumpDate() { + return new Date(dumpDate); + } + + /** + * Set dump date. + * @param dumpDate the dump date + */ + public void setDumpDate(final Date dumpDate) { + this.dumpDate = dumpDate.getTime(); + } + + /** + * Get the date of the previous dump at this level higher. + * @return dumpdate may be null + */ + public Date getPreviousDumpDate() { + return new Date(previousDumpDate); + } + + /** + * Set previous dump date. + * @param previousDumpDate the previous dump dat + */ + public void setPreviousDumpDate(final Date previousDumpDate) { + this.previousDumpDate = previousDumpDate.getTime(); + } + + /** + * Get volume (tape) number. + * @return volume (tape) number. + */ + public int getVolume() { + return volume; + } + + /** + * Set volume (tape) number. + * @param volume the volume number + */ + public void setVolume(final int volume) { + this.volume = volume; + } + + /** + * Get the level of this dump. This is a number between 0 and 9, inclusive, + * and a level 0 dump is a complete dump of the partition. For any other dump + * 'n' this dump contains all files that have changed since the last dump + * at this level or lower. This is used to support different levels of + * incremental backups. + * @return dump level + */ + public int getLevel() { + return level; + } + + /** + * Set level. + * @param level the level + */ + public void setLevel(final int level) { + this.level = level; + } + + /** + * Get dump label. This may be autogenerated or it may be specified + * bu the user. + * @return dump label + */ + public String getLabel() { + return label; + } + + /** + * Set dump label. + * @param label the label + */ + public void setLabel(final String label) { + this.label = label; + } + + /** + * Get the last mountpoint, e.g., /home. + * @return last mountpoint + */ + public String getFilesystem() { + return filesys; + } + + /** + * Set the last mountpoint. + * @param filesystem the last mountpoint + */ + public void setFilesystem(final String filesystem) { + this.filesys = filesystem; + } + + /** + * Get the device name, e.g., /dev/sda3 or /dev/mapper/vg0-home. + * @return device name + */ + public String getDevname() { + return devname; + } + + /** + * Set the device name. + * @param devname the device name + */ + public void setDevname(final String devname) { + this.devname = devname; + } + + /** + * Get the hostname of the system where the dump was performed. + * @return hostname the host name + */ + public String getHostname() { + return hostname; + } + + /** + * Set the hostname. + * @param hostname the host name + */ + public void setHostname(final String hostname) { + this.hostname = hostname; + } + + /** + * Get the miscellaneous flags. See below. + * @return flags + */ + public int getFlags() { + return flags; + } + + /** + * Set the miscellaneous flags. + * @param flags flags + */ + public void setFlags(final int flags) { + this.flags = flags; + } + + /** + * Get the inode of the first record on this volume. + * @return inode of the first record on this volume. + */ + public int getFirstRecord() { + return firstrec; + } + + /** + * Set the inode of the first record. + * @param firstrec the first record + */ + public void setFirstRecord(final int firstrec) { + this.firstrec = firstrec; + } + + /** + * Get the number of records per tape block. This is typically + * between 10 and 32. + * @return the number of records per tape block + */ + public int getNTRec() { + return ntrec; + } + + /** + * Set the number of records per tape block. + * @param ntrec the number of records per tape block + */ + public void setNTRec(final int ntrec) { + this.ntrec = ntrec; + } + + /** + * Is this the new header format? (We do not currently support the + * old format.) + * + * @return true if using new header format + */ + public boolean isNewHeader() { + return (flags & 0x0001) == 0x0001; + } + + /** + * Is this the new inode format? (We do not currently support the + * old format.) + * @return true if using new inode format + */ + public boolean isNewInode() { + return (flags & 0x0002) == 0x0002; + } + + /** + * Is this volume compressed? N.B., individual blocks may or may not be compressed. + * The first block is never compressed. + * @return true if volume is compressed + */ + public boolean isCompressed() { + return (flags & 0x0080) == 0x0080; + } + + /** + * Does this volume only contain metadata? + * @return true if volume only contains meta-data + */ + public boolean isMetaDataOnly() { + return (flags & 0x0100) == 0x0100; + } + + /** + * Does this volume cotain extended attributes. + * @return true if volume cotains extended attributes. + */ + public boolean isExtendedAttributes() { + return (flags & 0x8000) == 0x8000; + } + + @Override + public int hashCode() { + int hash = 17; + + if (label != null) { + hash = label.hashCode(); + } + + hash += 31 * dumpDate; + + if (hostname != null) { + hash = (31 * hostname.hashCode()) + 17; + } + + if (devname != null) { + hash = (31 * devname.hashCode()) + 17; + } + + return hash; + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + + if (o == null || !o.getClass().equals(getClass())) { + return false; + } + + final DumpArchiveSummary rhs = (DumpArchiveSummary) o; + + if (dumpDate != rhs.dumpDate) { + return false; + } + + if ((getHostname() == null) || + !getHostname().equals(rhs.getHostname())) { + return false; + } + + if ((getDevname() == null) || !getDevname().equals(rhs.getDevname())) { + return false; + } + + return true; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java new file mode 100644 index 000000000..20e1eb3f2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import java.io.IOException; +import java.util.Arrays; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.utils.ByteUtils; + +/** + * Various utilities for dump archives. + */ +class DumpArchiveUtil { + /** + * Private constructor to prevent instantiation. + */ + private DumpArchiveUtil() { + } + + /** + * Calculate checksum for buffer. + * + * @param buffer buffer containing tape segment header + * @returns checksum + */ + public static int calculateChecksum(final byte[] buffer) { + int calc = 0; + + for (int i = 0; i < 256; i++) { + calc += DumpArchiveUtil.convert32(buffer, 4 * i); + } + + return DumpArchiveConstants.CHECKSUM - + (calc - DumpArchiveUtil.convert32(buffer, 28)); + } + + /** + * Verify that the buffer contains a tape segment header. + * + * @param buffer + */ + public static final boolean verify(final byte[] buffer) { + // verify magic. for now only accept NFS_MAGIC. + final int magic = convert32(buffer, 24); + + if (magic != DumpArchiveConstants.NFS_MAGIC) { + return false; + } + + //verify checksum... + final int checksum = convert32(buffer, 28); + + return checksum == calculateChecksum(buffer); + } + + /** + * Get the ino associated with this buffer. + * + * @param buffer + */ + public static final int getIno(final byte[] buffer) { + return convert32(buffer, 20); + } + + /** + * Read 8-byte integer from buffer. + * + * @param buffer + * @param offset + * @return the 8-byte entry as a long + */ + public static final long convert64(final byte[] buffer, final int offset) { + return ByteUtils.fromLittleEndian(buffer, offset, 8); + } + + /** + * Read 4-byte integer from buffer. + * + * @param buffer + * @param offset + * @return the 4-byte entry as an int + */ + public static final int convert32(final byte[] buffer, final int offset) { + return (int) ByteUtils.fromLittleEndian(buffer, offset, 4); + } + + /** + * Read 2-byte integer from buffer. + * + * @param buffer + * @param offset + * @return the 2-byte entry as an int + */ + public static final int convert16(final byte[] buffer, final int offset) { + return (int) ByteUtils.fromLittleEndian(buffer, offset, 2); + } + + /** + * Decodes a byte array to a string. + */ + static String decode(final ZipEncoding encoding, final byte[] b, final int offset, final int len) + throws IOException { + return encoding.decode(Arrays.copyOfRange(b, offset, offset + len)); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/InvalidFormatException.java b/src/main/java/org/apache/commons/compress/archivers/dump/InvalidFormatException.java new file mode 100644 index 000000000..6169dfe90 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/InvalidFormatException.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + + +/** + * Invalid Format Exception. There was an error decoding a + * tape segment header. + */ +public class InvalidFormatException extends DumpArchiveException { + private static final long serialVersionUID = 1L; + protected long offset; + + public InvalidFormatException() { + super("there was an error decoding a tape segment"); + } + + public InvalidFormatException(final long offset) { + super("there was an error decoding a tape segment header at offset " + + offset + "."); + this.offset = offset; + } + + public long getOffset() { + return offset; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/ShortFileException.java b/src/main/java/org/apache/commons/compress/archivers/dump/ShortFileException.java new file mode 100644 index 000000000..e06c97cc0 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/ShortFileException.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + + +/** + * Short File Exception. There was an unexpected EOF when reading + * the input stream. + */ +public class ShortFileException extends DumpArchiveException { + private static final long serialVersionUID = 1L; + + public ShortFileException() { + super("unexpected EOF"); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/TapeInputStream.java b/src/main/java/org/apache/commons/compress/archivers/dump/TapeInputStream.java new file mode 100644 index 000000000..5643decef --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/TapeInputStream.java @@ -0,0 +1,355 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +import java.util.Arrays; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; + +import org.apache.commons.compress.utils.IOUtils; + +/** + * Filter stream that mimics a physical tape drive capable of compressing + * the data stream. + * + * @NotThreadSafe + */ +class TapeInputStream extends FilterInputStream { + private byte[] blockBuffer = new byte[DumpArchiveConstants.TP_SIZE]; + private int currBlkIdx = -1; + private int blockSize = DumpArchiveConstants.TP_SIZE; + private static final int RECORD_SIZE = DumpArchiveConstants.TP_SIZE; + private int readOffset = DumpArchiveConstants.TP_SIZE; + private boolean isCompressed = false; + private long bytesRead = 0; + + /** + * Constructor + */ + public TapeInputStream(final InputStream in) { + super(in); + } + + /** + * Set the DumpArchive Buffer's block size. We need to sync the block size with the + * dump archive's actual block size since compression is handled at the + * block level. + * + * @param recsPerBlock + * records per block + * @param isCompressed + * true if the archive is compressed + * @throws IOException + * more than one block has been read + * @throws IOException + * there was an error reading additional blocks. + */ + public void resetBlockSize(final int recsPerBlock, final boolean isCompressed) + throws IOException { + this.isCompressed = isCompressed; + + blockSize = RECORD_SIZE * recsPerBlock; + + // save first block in case we need it again + final byte[] oldBuffer = blockBuffer; + + // read rest of new block + blockBuffer = new byte[blockSize]; + System.arraycopy(oldBuffer, 0, blockBuffer, 0, RECORD_SIZE); + readFully(blockBuffer, RECORD_SIZE, blockSize - RECORD_SIZE); + + this.currBlkIdx = 0; + this.readOffset = RECORD_SIZE; + } + + /** + * @see java.io.InputStream#available + */ + @Override + public int available() throws IOException { + if (readOffset < blockSize) { + return blockSize - readOffset; + } + + return in.available(); + } + + /** + * @see java.io.InputStream#read() + */ + @Override + public int read() throws IOException { + throw new IllegalArgumentException( + "all reads must be multiple of record size (" + RECORD_SIZE + + " bytes."); + } + + /** + * {@inheritDoc} + * + * <p>reads the full given length unless EOF is reached.</p> + * + * @param len length to read, must be a multiple of the stream's + * record size + */ + @Override + public int read(final byte[] b, int off, final int len) throws IOException { + if ((len % RECORD_SIZE) != 0) { + throw new IllegalArgumentException( + "all reads must be multiple of record size (" + RECORD_SIZE + + " bytes."); + } + + int bytes = 0; + + while (bytes < len) { + // we need to read from the underlying stream. + // this will reset readOffset value. + // return -1 if there's a problem. + if (readOffset == blockSize) { + try { + readBlock(true); + } catch (ShortFileException sfe) { // NOSONAR + return -1; + } + } + + int n = 0; + + if ((readOffset + (len - bytes)) <= blockSize) { + // we can read entirely from the buffer. + n = len - bytes; + } else { + // copy what we can from the buffer. + n = blockSize - readOffset; + } + + // copy data, increment counters. + System.arraycopy(blockBuffer, readOffset, b, off, n); + readOffset += n; + bytes += n; + off += n; + } + + return bytes; + } + + /** + * Skip bytes. Same as read but without the arraycopy. + * + * <p>skips the full given length unless EOF is reached.</p> + * + * @param len length to read, must be a multiple of the stream's + * record size + */ + @Override + public long skip(final long len) throws IOException { + if ((len % RECORD_SIZE) != 0) { + throw new IllegalArgumentException( + "all reads must be multiple of record size (" + RECORD_SIZE + + " bytes."); + } + + long bytes = 0; + + while (bytes < len) { + // we need to read from the underlying stream. + // this will reset readOffset value. We do not perform + // any decompression if we won't eventually read the data. + // return -1 if there's a problem. + if (readOffset == blockSize) { + try { + readBlock((len - bytes) < blockSize); + } catch (ShortFileException sfe) { // NOSONAR + return -1; + } + } + + long n = 0; + + if ((readOffset + (len - bytes)) <= blockSize) { + // we can read entirely from the buffer. + n = len - bytes; + } else { + // copy what we can from the buffer. + n = (long) blockSize - readOffset; + } + + // do not copy data but still increment counters. + readOffset += n; + bytes += n; + } + + return bytes; + } + + /** + * Close the input stream. + * + * @throws IOException on error + */ + @Override + public void close() throws IOException { + if (in != null && in != System.in) { + in.close(); + } + } + + /** + * Peek at the next record from the input stream and return the data. + * + * @return The record data. + * @throws IOException on error + */ + public byte[] peek() throws IOException { + // we need to read from the underlying stream. This + // isn't a problem since it would be the first step in + // any subsequent read() anyway. + if (readOffset == blockSize) { + try { + readBlock(true); + } catch (ShortFileException sfe) { // NOSONAR + return null; + } + } + + // copy data, increment counters. + final byte[] b = new byte[RECORD_SIZE]; + System.arraycopy(blockBuffer, readOffset, b, 0, b.length); + + return b; + } + + /** + * Read a record from the input stream and return the data. + * + * @return The record data. + * @throws IOException on error + */ + public byte[] readRecord() throws IOException { + final byte[] result = new byte[RECORD_SIZE]; + + // the read implementation will loop internally as long as + // input is available + if (-1 == read(result, 0, result.length)) { + throw new ShortFileException(); + } + + return result; + } + + /** + * Read next block. All decompression is handled here. + * + * @param decompress if false the buffer will not be decompressed. + * This is an optimization for longer seeks. + */ + private void readBlock(final boolean decompress) throws IOException { + if (in == null) { + throw new IOException("input buffer is closed"); + } + + if (!isCompressed || (currBlkIdx == -1)) { + // file is not compressed + readFully(blockBuffer, 0, blockSize); + bytesRead += blockSize; + } else { + readFully(blockBuffer, 0, 4); + bytesRead += 4; + + final int h = DumpArchiveUtil.convert32(blockBuffer, 0); + final boolean compressed = (h & 0x01) == 0x01; + + if (!compressed) { + // file is compressed but this block is not. + readFully(blockBuffer, 0, blockSize); + bytesRead += blockSize; + } else { + // this block is compressed. + final int flags = (h >> 1) & 0x07; + int length = (h >> 4) & 0x0FFFFFFF; + final byte[] compBuffer = new byte[length]; + readFully(compBuffer, 0, length); + bytesRead += length; + + if (!decompress) { + // just in case someone reads the data. + Arrays.fill(blockBuffer, (byte) 0); + } else { + switch (DumpArchiveConstants.COMPRESSION_TYPE.find(flags & + 0x03)) { + case ZLIB: + + final Inflater inflator = new Inflater(); + try { + inflator.setInput(compBuffer, 0, compBuffer.length); + length = inflator.inflate(blockBuffer); + + if (length != blockSize) { + throw new ShortFileException(); + } + } catch (final DataFormatException e) { + throw new DumpArchiveException("bad data", e); + } finally { + inflator.end(); + } + + break; + + case BZLIB: + throw new UnsupportedCompressionAlgorithmException( + "BZLIB2"); + + case LZO: + throw new UnsupportedCompressionAlgorithmException( + "LZO"); + + default: + throw new UnsupportedCompressionAlgorithmException(); + } + } + } + } + + currBlkIdx++; + readOffset = 0; + } + + /** + * Read buffer + */ + private void readFully(final byte[] b, final int off, final int len) + throws IOException { + final int count = IOUtils.readFully(in, b, off, len); + if (count < len) { + throw new ShortFileException(); + } + } + + /** + * Get number of bytes read. + */ + public long getBytesRead() { + return bytesRead; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/UnrecognizedFormatException.java b/src/main/java/org/apache/commons/compress/archivers/dump/UnrecognizedFormatException.java new file mode 100644 index 000000000..333aeacd6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/UnrecognizedFormatException.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + + +/** + * Unrecognized Format Exception. This is either not a recognized dump archive or there's + * a bad tape segment header. + */ +public class UnrecognizedFormatException extends DumpArchiveException { + private static final long serialVersionUID = 1L; + + public UnrecognizedFormatException() { + super("this is not a recognized format."); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/UnsupportedCompressionAlgorithmException.java b/src/main/java/org/apache/commons/compress/archivers/dump/UnsupportedCompressionAlgorithmException.java new file mode 100644 index 000000000..8c6040304 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/UnsupportedCompressionAlgorithmException.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + + +/** + * Unsupported compression algorithm. The dump archive uses an unsupported + * compression algorithm (BZLIB2 or LZO). + */ +public class UnsupportedCompressionAlgorithmException + extends DumpArchiveException { + private static final long serialVersionUID = 1L; + + public UnsupportedCompressionAlgorithmException() { + super("this file uses an unsupported compression algorithm."); + } + + public UnsupportedCompressionAlgorithmException(final String alg) { + super("this file uses an unsupported compression algorithm: " + alg + + "."); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/dump/package.html b/src/main/java/org/apache/commons/compress/archivers/dump/package.html new file mode 100644 index 000000000..72f3c68c4 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/dump/package.html @@ -0,0 +1,56 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>This package provides stream classes for reading archives + using the Unix DUMP format. This format is similar to (and + contemporary with) TAR but reads the raw filesystem directly. + This means that writers are filesystem-specific even though the + created archives are filesystem-agnostic. + </p> + + <p>Unlike other formats DUMP offers clean support for sparse files, + extended attributes, and other file metadata. In addition DUMP + supports incremental dump files can capture (most) file deletion. + It also provides a native form of compression and will soon support + native encryption as well. + </p> + + <p>In practice TAR archives are used for both distribution + and backups. DUMP archives are used exclusively for backups. + </p> + + <p>Like any 30+-year-old application there are a number of variants. + For pragmatic reasons we will only support archives with the + 'new' tape header and inode formats. Other restrictions: + + <ul> + <li>We only support ZLIB compression. The format + also permits LZO and BZLIB compression.</li> + <li>Sparse files will have the holes filled.</li> + <li>MacOS finder and resource streams are ignored.</li> + <li>Extended attributes are not currently provided.</li> + <li>SELinux labels are not currently provided.</li> + </ul> + </p> + + <p>As of Apache Commons Compress 1.3 support for the dump format is + read-only.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/examples/Archiver.java b/src/main/java/org/apache/commons/compress/archivers/examples/Archiver.java new file mode 100644 index 000000000..b34d16bb7 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/examples/Archiver.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.examples; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.compress.utils.IOUtils; + +/** + * Provides a high level API for creating archives. + * @since 1.17 + */ +public class Archiver { + + private interface ArchiveEntryCreator { + ArchiveEntry create(File f, String entryName) throws IOException; + } + + private interface ArchiveEntryConsumer { + void accept(File source, ArchiveEntry entry) throws IOException; + } + + private interface Finisher { + void finish() throws IOException; + } + + /** + * Creates an archive {@code target} using the format {@code + * format} by recursively including all files and directories in + * {@code directory}. + * + * @param format the archive format. This uses the same format as + * accepted by {@link ArchiveStreamFactory}. + * @param target the file to write the new archive to. + * @param directory the directory that contains the files to archive. + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be created for other reasons + */ + public void create(String format, File target, File directory) throws IOException, ArchiveException { + if (prefersSeekableByteChannel(format)) { + try (SeekableByteChannel c = FileChannel.open(target.toPath(), StandardOpenOption.WRITE, + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) { + create(format, c, directory); + } + return; + } + try (OutputStream o = Files.newOutputStream(target.toPath())) { + create(format, o, directory); + } + } + + /** + * Creates an archive {@code target} using the format {@code + * format} by recursively including all files and directories in + * {@code directory}. + * + * @param format the archive format. This uses the same format as + * accepted by {@link ArchiveStreamFactory}. + * @param target the stream to write the new archive to. + * @param directory the directory that contains the files to archive. + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be created for other reasons + */ + public void create(String format, OutputStream target, File directory) throws IOException, ArchiveException { + create(new ArchiveStreamFactory().createArchiveOutputStream(format, target), directory); + } + + /** + * Creates an archive {@code target} using the format {@code + * format} by recursively including all files and directories in + * {@code directory}. + * + * @param format the archive format. This uses the same format as + * accepted by {@link ArchiveStreamFactory}. + * @param target the channel to write the new archive to. + * @param directory the directory that contains the files to archive. + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be created for other reasons + */ + public void create(String format, SeekableByteChannel target, File directory) + throws IOException, ArchiveException { + if (!prefersSeekableByteChannel(format)) { + create(format, Channels.newOutputStream(target), directory); + } else if (ArchiveStreamFactory.ZIP.equalsIgnoreCase(format)) { + create(new ZipArchiveOutputStream(target), directory); + } else if (ArchiveStreamFactory.SEVEN_Z.equalsIgnoreCase(format)) { + create(new SevenZOutputFile(target), directory); + } else { + // never reached as prefersSeekableByteChannel only returns true for ZIP and 7z + throw new ArchiveException("don't know how to handle format " + format); + } + } + + /** + * Creates an archive {@code target} by recursively including all + * files and directories in {@code directory}. + * + * @param target the stream to write the new archive to. + * @param directory the directory that contains the files to archive. + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be created for other reasons + */ + public void create(final ArchiveOutputStream target, File directory) + throws IOException, ArchiveException { + create(directory, new ArchiveEntryCreator() { + public ArchiveEntry create(File f, String entryName) throws IOException { + return target.createArchiveEntry(f, entryName); + } + }, new ArchiveEntryConsumer() { + public void accept(File source, ArchiveEntry e) throws IOException { + target.putArchiveEntry(e); + if (!e.isDirectory()) { + try (InputStream in = new BufferedInputStream(Files.newInputStream(source.toPath()))) { + IOUtils.copy(in, target); + } + } + target.closeArchiveEntry(); + } + }, new Finisher() { + public void finish() throws IOException { + target.finish(); + } + }); + } + + /** + * Creates an archive {@code target} by recursively including all + * files and directories in {@code directory}. + * + * @param target the file to write the new archive to. + * @param directory the directory that contains the files to archive. + * @throws IOException if an I/O error occurs + */ + public void create(final SevenZOutputFile target, File directory) throws IOException { + create(directory, new ArchiveEntryCreator() { + public ArchiveEntry create(File f, String entryName) throws IOException { + return target.createArchiveEntry(f, entryName); + } + }, new ArchiveEntryConsumer() { + public void accept(File source, ArchiveEntry e) throws IOException { + target.putArchiveEntry(e); + if (!e.isDirectory()) { + final byte[] buffer = new byte[8024]; + int n = 0; + long count = 0; + try (InputStream in = new BufferedInputStream(Files.newInputStream(source.toPath()))) { + while (-1 != (n = in.read(buffer))) { + target.write(buffer, 0, n); + count += n; + } + } + } + target.closeArchiveEntry(); + } + }, new Finisher() { + public void finish() throws IOException { + target.finish(); + } + }); + } + + private boolean prefersSeekableByteChannel(String format) { + return ArchiveStreamFactory.ZIP.equalsIgnoreCase(format) || ArchiveStreamFactory.SEVEN_Z.equalsIgnoreCase(format); + } + + private void create(File directory, ArchiveEntryCreator creator, ArchiveEntryConsumer consumer, + Finisher finisher) throws IOException { + create("", directory, creator, consumer); + finisher.finish(); + } + + private void create(String prefix, File directory, ArchiveEntryCreator creator, ArchiveEntryConsumer consumer) + throws IOException { + File[] children = directory.listFiles(); + if (children == null) { + return; + } + for (File f : children) { + String entryName = prefix + f.getName() + (f.isDirectory() ? "/" : ""); + consumer.accept(f, creator.create(f, entryName)); + if (f.isDirectory()) { + create(entryName, f, creator, consumer); + } + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/examples/Expander.java b/src/main/java/org/apache/commons/compress/archivers/examples/Expander.java new file mode 100644 index 000000000..492252755 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/examples/Expander.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.examples; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.Enumeration; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.archivers.sevenz.SevenZFile; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.compress.utils.IOUtils; + +/** + * Provides a high level API for expanding archives. + * @since 1.17 + */ +public class Expander { + + private interface ArchiveEntrySupplier { + ArchiveEntry getNextReadableEntry() throws IOException; + } + + private interface EntryWriter { + void writeEntryDataTo(ArchiveEntry entry, OutputStream out) throws IOException; + } + + /** + * Expands {@code archive} into {@code targetDirectory}. + * + * <p>Tries to auto-detect the archive's format.</p> + * + * @param archive the file to expand + * @param targetDirectory the directory to write to + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be read for other reasons + */ + public void expand(File archive, File targetDirectory) throws IOException, ArchiveException { + String format = null; + try (InputStream i = new BufferedInputStream(Files.newInputStream(archive.toPath()))) { + format = new ArchiveStreamFactory().detect(i); + } + expand(format, archive, targetDirectory); + } + + /** + * Expands {@code archive} into {@code targetDirectory}. + * + * @param archive the file to expand + * @param targetDirectory the directory to write to + * @param format the archive format. This uses the same format as + * accepted by {@link ArchiveStreamFactory}. + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be read for other reasons + */ + public void expand(String format, File archive, File targetDirectory) throws IOException, ArchiveException { + if (prefersSeekableByteChannel(format)) { + try (SeekableByteChannel c = FileChannel.open(archive.toPath(), StandardOpenOption.READ)) { + expand(format, c, targetDirectory); + } + return; + } + try (InputStream i = new BufferedInputStream(Files.newInputStream(archive.toPath()))) { + expand(format, i, targetDirectory); + } + } + + /** + * Expands {@code archive} into {@code targetDirectory}. + * + * <p>Tries to auto-detect the archive's format.</p> + * + * @param archive the file to expand + * @param targetDirectory the directory to write to + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be read for other reasons + */ + public void expand(InputStream archive, File targetDirectory) throws IOException, ArchiveException { + expand(new ArchiveStreamFactory().createArchiveInputStream(archive), targetDirectory); + } + + /** + * Expands {@code archive} into {@code targetDirectory}. + * + * @param archive the file to expand + * @param targetDirectory the directory to write to + * @param format the archive format. This uses the same format as + * accepted by {@link ArchiveStreamFactory}. + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be read for other reasons + */ + public void expand(String format, InputStream archive, File targetDirectory) + throws IOException, ArchiveException { + expand(new ArchiveStreamFactory().createArchiveInputStream(format, archive), targetDirectory); + } + + /** + * Expands {@code archive} into {@code targetDirectory}. + * + * @param archive the file to expand + * @param targetDirectory the directory to write to + * @param format the archive format. This uses the same format as + * accepted by {@link ArchiveStreamFactory}. + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be read for other reasons + */ + public void expand(String format, SeekableByteChannel archive, File targetDirectory) + throws IOException, ArchiveException { + if (!prefersSeekableByteChannel(format)) { + expand(format, Channels.newInputStream(archive), targetDirectory); + } else if (ArchiveStreamFactory.ZIP.equalsIgnoreCase(format)) { + expand(new ZipFile(archive), targetDirectory); + } else if (ArchiveStreamFactory.SEVEN_Z.equalsIgnoreCase(format)) { + expand(new SevenZFile(archive), targetDirectory); + } else { + // never reached as prefersSeekableByteChannel only returns true for ZIP and 7z + throw new ArchiveException("don't know how to handle format " + format); + } + } + + /** + * Expands {@code archive} into {@code targetDirectory}. + * + * @param archive the file to expand + * @param targetDirectory the directory to write to + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be read for other reasons + */ + public void expand(final ArchiveInputStream archive, File targetDirectory) + throws IOException, ArchiveException { + expand(new ArchiveEntrySupplier() { + @Override + public ArchiveEntry getNextReadableEntry() throws IOException { + ArchiveEntry next = archive.getNextEntry(); + while (next != null && !archive.canReadEntryData(next)) { + next = archive.getNextEntry(); + } + return next; + } + }, new EntryWriter() { + @Override + public void writeEntryDataTo(ArchiveEntry entry, OutputStream out) throws IOException { + IOUtils.copy(archive, out); + } + }, targetDirectory); + } + + /** + * Expands {@code archive} into {@code targetDirectory}. + * + * @param archive the file to expand + * @param targetDirectory the directory to write to + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be read for other reasons + */ + public void expand(final ZipFile archive, File targetDirectory) + throws IOException, ArchiveException { + final Enumeration<ZipArchiveEntry> entries = archive.getEntries(); + expand(new ArchiveEntrySupplier() { + @Override + public ArchiveEntry getNextReadableEntry() throws IOException { + ZipArchiveEntry next = entries.hasMoreElements() ? entries.nextElement() : null; + while (next != null && !archive.canReadEntryData(next)) { + next = entries.hasMoreElements() ? entries.nextElement() : null; + } + return next; + } + }, new EntryWriter() { + @Override + public void writeEntryDataTo(ArchiveEntry entry, OutputStream out) throws IOException { + try (InputStream in = archive.getInputStream((ZipArchiveEntry) entry)) { + IOUtils.copy(in, out); + } + } + }, targetDirectory); + } + + /** + * Expands {@code archive} into {@code targetDirectory}. + * + * @param archive the file to expand + * @param targetDirectory the directory to write to + * @throws IOException if an I/O error occurs + * @throws ArchiveException if the archive cannot be read for other reasons + */ + public void expand(final SevenZFile archive, File targetDirectory) + throws IOException, ArchiveException { + expand(new ArchiveEntrySupplier() { + @Override + public ArchiveEntry getNextReadableEntry() throws IOException { + return archive.getNextEntry(); + } + }, new EntryWriter() { + @Override + public void writeEntryDataTo(ArchiveEntry entry, OutputStream out) throws IOException { + final byte[] buffer = new byte[8024]; + int n; + while (-1 != (n = archive.read(buffer))) { + out.write(buffer, 0, n); + } + } + }, targetDirectory); + } + + private boolean prefersSeekableByteChannel(String format) { + return ArchiveStreamFactory.ZIP.equalsIgnoreCase(format) || ArchiveStreamFactory.SEVEN_Z.equalsIgnoreCase(format); + } + + private void expand(ArchiveEntrySupplier supplier, EntryWriter writer, File targetDirectory) + throws IOException { + String targetDirPath = targetDirectory.getCanonicalPath(); + if (!targetDirPath.endsWith(File.separator)) { + targetDirPath += File.separator; + } + ArchiveEntry nextEntry = supplier.getNextReadableEntry(); + while (nextEntry != null) { + File f = new File(targetDirectory, nextEntry.getName()); + if (!f.getCanonicalPath().startsWith(targetDirPath)) { + throw new IOException("expanding " + nextEntry.getName() + + " would create file outside of " + targetDirectory); + } + if (nextEntry.isDirectory()) { + if (!f.isDirectory() && !f.mkdirs()) { + throw new IOException("failed to create directory " + f); + } + } else { + File parent = f.getParentFile(); + if (!parent.isDirectory() && !parent.mkdirs()) { + throw new IOException("failed to create directory " + parent); + } + try (OutputStream o = Files.newOutputStream(f.toPath())) { + writer.writeEntryDataTo(nextEntry, o); + } + } + nextEntry = supplier.getNextReadableEntry(); + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/examples/package.html b/src/main/java/org/apache/commons/compress/archivers/examples/package.html new file mode 100644 index 000000000..443d5fc8f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/examples/package.html @@ -0,0 +1,25 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Contains example code that is not guaranteed to provide a + stable API across releases of Commons Compress.</p> + + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveEntry.java new file mode 100644 index 000000000..f0c05f046 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveEntry.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.jar; + +import java.security.cert.Certificate; +import java.util.jar.Attributes; +import java.util.jar.JarEntry; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; + +/** + * + * @NotThreadSafe (parent is not thread-safe) + */ +public class JarArchiveEntry extends ZipArchiveEntry { + + // These are always null - see https://issues.apache.org/jira/browse/COMPRESS-18 for discussion + private final Attributes manifestAttributes = null; + private final Certificate[] certificates = null; + + public JarArchiveEntry(final ZipEntry entry) throws ZipException { + super(entry); + } + + public JarArchiveEntry(final String name) { + super(name); + } + + public JarArchiveEntry(final ZipArchiveEntry entry) throws ZipException { + super(entry); + } + + public JarArchiveEntry(final JarEntry entry) throws ZipException { + super(entry); + + } + + /** + * This method is not implemented and won't ever be. + * The JVM equivalent has a different name {@link java.util.jar.JarEntry#getAttributes()} + * + * @deprecated since 1.5, do not use; always returns null + * @return Always returns null. + */ + @Deprecated + public Attributes getManifestAttributes() { + return manifestAttributes; + } + + /** + * Return a copy of the list of certificates or null if there are none. + * + * @return Always returns null in the current implementation + * + * @deprecated since 1.5, not currently implemented + */ + @Deprecated + public Certificate[] getCertificates() { + if (certificates != null) { // never true currently // NOSONAR + final Certificate[] certs = new Certificate[certificates.length]; + System.arraycopy(certificates, 0, certs, 0, certs.length); + return certs; + } + /* + * Note, the method + * Certificate[] java.util.jar.JarEntry.getCertificates() + * also returns null or the list of certificates (but not copied) + */ + return null; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveInputStream.java new file mode 100644 index 000000000..47b1583ca --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveInputStream.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.jar; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; + +/** + * Implements an input stream that can read entries from jar files. + * + * @NotThreadSafe + */ +public class JarArchiveInputStream extends ZipArchiveInputStream { + + /** + * Creates an instance from the input stream using the default encoding. + * + * @param inputStream the input stream to wrap + */ + public JarArchiveInputStream( final InputStream inputStream ) { + super(inputStream); + } + + /** + * Creates an instance from the input stream using the specified encoding. + * + * @param inputStream the input stream to wrap + * @param encoding the encoding to use + * @since 1.10 + */ + public JarArchiveInputStream( final InputStream inputStream, final String encoding ) { + super(inputStream, encoding); + } + + public JarArchiveEntry getNextJarEntry() throws IOException { + final ZipArchiveEntry entry = getNextZipEntry(); + return entry == null ? null : new JarArchiveEntry(entry); + } + + @Override + public ArchiveEntry getNextEntry() throws IOException { + return getNextJarEntry(); + } + + /** + * Checks if the signature matches what is expected for a jar file + * (in this case it is the same as for a zip file). + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is a jar archive stream, false otherwise + */ + public static boolean matches(final byte[] signature, final int length ) { + return ZipArchiveInputStream.matches(signature, length); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveOutputStream.java new file mode 100644 index 000000000..5e2c7a8e5 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/jar/JarArchiveOutputStream.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.jar; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.zip.JarMarker; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; + +/** + * Subclass that adds a special extra field to the very first entry + * which allows the created archive to be used as an executable jar on + * Solaris. + * + * @NotThreadSafe + */ +public class JarArchiveOutputStream extends ZipArchiveOutputStream { + + private boolean jarMarkerAdded = false; + + public JarArchiveOutputStream(final OutputStream out) { + super(out); + } + + /** + * Create and instance that wraps the output stream using the provided encoding. + * + * @param out the output stream to wrap + * @param encoding the encoding to use. Use null for the platform default. + * @since 1.10 + */ + public JarArchiveOutputStream(final OutputStream out, final String encoding) { + super(out); + setEncoding(encoding); + } + + // @throws ClassCastException if entry is not an instance of ZipArchiveEntry + @Override + public void putArchiveEntry(final ArchiveEntry ze) throws IOException { + if (!jarMarkerAdded) { + ((ZipArchiveEntry)ze).addAsFirstExtraField(JarMarker.getInstance()); + jarMarkerAdded = true; + } + super.putArchiveEntry(ze); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/jar/package.html b/src/main/java/org/apache/commons/compress/archivers/jar/package.html new file mode 100644 index 000000000..09829ae6a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/jar/package.html @@ -0,0 +1,25 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for reading and writing archives using + the ZIP format with some extensions for the special case of JAR + archives.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/package.html b/src/main/java/org/apache/commons/compress/archivers/package.html new file mode 100644 index 000000000..df1922b4a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides a unified API and factories for dealing with archives + in different formats.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256Decoder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256Decoder.java new file mode 100644 index 000000000..aca9777fd --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256Decoder.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.security.GeneralSecurityException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import javax.crypto.Cipher; +import javax.crypto.CipherInputStream; +import javax.crypto.SecretKey; +import javax.crypto.spec.IvParameterSpec; +import javax.crypto.spec.SecretKeySpec; +import org.apache.commons.compress.PasswordRequiredException; + +class AES256SHA256Decoder extends CoderBase { + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] passwordBytes) throws IOException { + return new InputStream() { + private boolean isInitialized = false; + private CipherInputStream cipherInputStream = null; + + private CipherInputStream init() throws IOException { + if (isInitialized) { + return cipherInputStream; + } + final int byte0 = 0xff & coder.properties[0]; + final int numCyclesPower = byte0 & 0x3f; + final int byte1 = 0xff & coder.properties[1]; + final int ivSize = ((byte0 >> 6) & 1) + (byte1 & 0x0f); + final int saltSize = ((byte0 >> 7) & 1) + (byte1 >> 4); + if (2 + saltSize + ivSize > coder.properties.length) { + throw new IOException("Salt size + IV size too long in " + archiveName); + } + final byte[] salt = new byte[saltSize]; + System.arraycopy(coder.properties, 2, salt, 0, saltSize); + final byte[] iv = new byte[16]; + System.arraycopy(coder.properties, 2 + saltSize, iv, 0, ivSize); + + if (passwordBytes == null) { + throw new PasswordRequiredException(archiveName); + } + final byte[] aesKeyBytes; + if (numCyclesPower == 0x3f) { + aesKeyBytes = new byte[32]; + System.arraycopy(salt, 0, aesKeyBytes, 0, saltSize); + System.arraycopy(passwordBytes, 0, aesKeyBytes, saltSize, + Math.min(passwordBytes.length, aesKeyBytes.length - saltSize)); + } else { + final MessageDigest digest; + try { + digest = MessageDigest.getInstance("SHA-256"); + } catch (final NoSuchAlgorithmException noSuchAlgorithmException) { + throw new IOException("SHA-256 is unsupported by your Java implementation", + noSuchAlgorithmException); + } + final byte[] extra = new byte[8]; + for (long j = 0; j < (1L << numCyclesPower); j++) { + digest.update(salt); + digest.update(passwordBytes); + digest.update(extra); + for (int k = 0; k < extra.length; k++) { + ++extra[k]; + if (extra[k] != 0) { + break; + } + } + } + aesKeyBytes = digest.digest(); + } + + final SecretKey aesKey = new SecretKeySpec(aesKeyBytes, "AES"); + try { + final Cipher cipher = Cipher.getInstance("AES/CBC/NoPadding"); + cipher.init(Cipher.DECRYPT_MODE, aesKey, new IvParameterSpec(iv)); + cipherInputStream = new CipherInputStream(in, cipher); + isInitialized = true; + return cipherInputStream; + } catch (final GeneralSecurityException generalSecurityException) { + throw new IOException("Decryption error " + + "(do you have the JCE Unlimited Strength Jurisdiction Policy Files installed?)", + generalSecurityException); + } + } + + @Override + public int read() throws IOException { + return init().read(); + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + return init().read(b, off, len); + } + + @Override + public void close() { + } + }; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java new file mode 100644 index 000000000..dd1c75aa2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.BitSet; + +class Archive { + /// Offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams. + long packPos; + /// Size of each packed stream. + long[] packSizes; + /// Whether each particular packed streams has a CRC. + BitSet packCrcsDefined; + /// CRCs for each packed stream, valid only if that packed stream has one. + long[] packCrcs; + /// Properties of solid compression blocks. + Folder[] folders; + /// Temporary properties for non-empty files (subsumed into the files array later). + SubStreamsInfo subStreamsInfo; + /// The files and directories in the archive. + SevenZArchiveEntry[] files; + /// Mapping between folders, files and streams. + StreamMap streamMap; + + @Override + public String toString() { + return "Archive with packed streams starting at offset " + packPos + + ", " + lengthOf(packSizes) + " pack sizes, " + lengthOf(packCrcs) + + " CRCs, " + lengthOf(folders) + " folders, " + lengthOf(files) + + " files and " + streamMap; + } + + private static String lengthOf(final long[] a) { + return a == null ? "(null)" : String.valueOf(a.length); + } + + private static String lengthOf(final Object[] a) { + return a == null ? "(null)" : String.valueOf(a.length); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java new file mode 100644 index 000000000..2710b72ff --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +class BindPair { + long inIndex; + long outIndex; + + @Override + public String toString() { + return "BindPair binding input " + inIndex + " to output " + outIndex; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableByteChannelInputStream.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableByteChannelInputStream.java new file mode 100644 index 000000000..32b3bda7b --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableByteChannelInputStream.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; + +class BoundedSeekableByteChannelInputStream extends InputStream { + private static final int MAX_BUF_LEN = 8192; + private final ByteBuffer buffer; + private final SeekableByteChannel channel; + private long bytesRemaining; + + public BoundedSeekableByteChannelInputStream(final SeekableByteChannel channel, + final long size) { + this.channel = channel; + this.bytesRemaining = size; + if (size < MAX_BUF_LEN && size > 0) { + buffer = ByteBuffer.allocate((int) size); + } else { + buffer = ByteBuffer.allocate(MAX_BUF_LEN); + } + } + + @Override + public int read() throws IOException { + if (bytesRemaining > 0) { + --bytesRemaining; + int read = read(1); + if (read < 0) { + return read; + } + return buffer.get() & 0xff; + } + return -1; + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (bytesRemaining == 0) { + return -1; + } + int bytesToRead = len; + if (bytesToRead > bytesRemaining) { + bytesToRead = (int) bytesRemaining; + } + int bytesRead; + ByteBuffer buf; + if (bytesToRead <= buffer.capacity()) { + buf = buffer; + bytesRead = read(bytesToRead); + } else { + buf = ByteBuffer.allocate(bytesToRead); + bytesRead = channel.read(buf); + buf.flip(); + } + if (bytesRead >= 0) { + buf.get(b, off, bytesRead); + bytesRemaining -= bytesRead; + } + return bytesRead; + } + + private int read(int len) throws IOException { + buffer.rewind().limit(len); + int read = channel.read(buffer); + buffer.flip(); + return read; + } + + @Override + public void close() { + // the nested channel is controlled externally + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/CLI.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/CLI.java new file mode 100644 index 000000000..a40f2b318 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/CLI.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +public class CLI { + + + private enum Mode { + LIST("Analysing") { + @Override + public void takeAction(final SevenZFile archive, final SevenZArchiveEntry entry) { + System.out.print(entry.getName()); + if (entry.isDirectory()) { + System.out.print(" dir"); + } else { + System.out.print(" " + entry.getCompressedSize() + + "/" + entry.getSize()); + } + if (entry.getHasLastModifiedDate()) { + System.out.print(" " + entry.getLastModifiedDate()); + } else { + System.out.print(" no last modified date"); + } + if (!entry.isDirectory()) { + System.out.println(" " + getContentMethods(entry)); + } else { + System.out.println(""); + } + } + + private String getContentMethods(final SevenZArchiveEntry entry) { + final StringBuilder sb = new StringBuilder(); + boolean first = true; + for (final SevenZMethodConfiguration m : entry.getContentMethods()) { + if (!first) { + sb.append(", "); + } + first = false; + sb.append(m.getMethod()); + if (m.getOptions() != null) { + sb.append("(").append(m.getOptions()).append(")"); + } + } + return sb.toString(); + } + }, + EXTRACT("Extracting") { + private final byte[] buf = new byte[8192]; + @Override + public void takeAction(final SevenZFile archive, final SevenZArchiveEntry entry) + throws IOException { + final File outFile = new File(entry.getName()); + if (entry.isDirectory()) { + if (!outFile.isDirectory() && !outFile.mkdirs()) { + throw new IOException("Cannot create directory " + outFile); + } + System.out.println("created directory " + outFile); + return; + } + + System.out.println("extracting to " + outFile); + final File parent = outFile.getParentFile(); + if (parent != null && !parent.exists() && !parent.mkdirs()) { + throw new IOException("Cannot create " + parent); + } + try (final FileOutputStream fos = new FileOutputStream(outFile)) { + final long total = entry.getSize(); + long off = 0; + while (off < total) { + final int toRead = (int) Math.min(total - off, buf.length); + final int bytesRead = archive.read(buf, 0, toRead); + if (bytesRead < 1) { + throw new IOException("reached end of entry " + + entry.getName() + + " after " + off + + " bytes, expected " + + total); + } + off += bytesRead; + fos.write(buf, 0, bytesRead); + } + } + } + }; + + private final String message; + Mode(final String message) { + this.message = message; + } + public String getMessage() { + return message; + } + public abstract void takeAction(SevenZFile archive, SevenZArchiveEntry entry) + throws IOException; + } + + public static void main(final String[] args) throws Exception { + if (args.length == 0) { + usage(); + return; + } + final Mode mode = grabMode(args); + System.out.println(mode.getMessage() + " " + args[0]); + final File f = new File(args[0]); + if (!f.isFile()) { + System.err.println(f + " doesn't exist or is a directory"); + } + try (final SevenZFile archive = new SevenZFile(f)) { + SevenZArchiveEntry ae; + while((ae=archive.getNextEntry()) != null) { + mode.takeAction(archive, ae); + } + } + } + + private static void usage() { + System.out.println("Parameters: archive-name [list|extract]"); + } + + private static Mode grabMode(final String[] args) { + if (args.length < 2) { + return Mode.LIST; + } + return Enum.valueOf(Mode.class, args[1].toUpperCase()); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java new file mode 100644 index 000000000..cbd271d1d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +class Coder { + byte[] decompressionMethodId; + long numInStreams; + long numOutStreams; + byte[] properties = null; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/CoderBase.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/CoderBase.java new file mode 100644 index 000000000..384251395 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/CoderBase.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Base Codec class. + */ +abstract class CoderBase { + private final Class<?>[] acceptableOptions; + private static final byte[] NONE = new byte[0]; + + /** + * @param acceptableOptions types that can be used as options for this codec. + */ + protected CoderBase(final Class<?>... acceptableOptions) { + this.acceptableOptions = acceptableOptions; + } + + /** + * @return whether this method can extract options from the given object. + */ + boolean canAcceptOptions(final Object opts) { + for (final Class<?> c : acceptableOptions) { + if (c.isInstance(opts)) { + return true; + } + } + return false; + } + + /** + * @return property-bytes to write in a Folder block + */ + byte[] getOptionsAsProperties(final Object options) throws IOException { + return NONE; + } + + /** + * @return configuration options that have been used to create the given InputStream from the given Coder + */ + Object getOptionsFromCoder(final Coder coder, final InputStream in) throws IOException { + return null; + } + + /** + * @return a stream that reads from in using the configured coder and password. + */ + abstract InputStream decode(final String archiveName, + final InputStream in, long uncomressedLength, + final Coder coder, byte[] password) throws IOException; + + /** + * @return a stream that writes to out using the given configuration. + */ + OutputStream encode(final OutputStream out, final Object options) throws IOException { + throw new UnsupportedOperationException("method doesn't support writing"); + } + + /** + * If the option represents a number, return its integer + * value, otherwise return the given default value. + */ + protected static int numberOptionOrDefault(final Object options, final int defaultValue) { + return options instanceof Number ? ((Number) options).intValue() : defaultValue; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Coders.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coders.java new file mode 100644 index 000000000..729a1a3d8 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coders.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.SequenceInputStream; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.Inflater; +import java.util.zip.InflaterInputStream; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.utils.FlushShieldFilterOutputStream; +import org.tukaani.xz.ARMOptions; +import org.tukaani.xz.ARMThumbOptions; +import org.tukaani.xz.FilterOptions; +import org.tukaani.xz.FinishableWrapperOutputStream; +import org.tukaani.xz.IA64Options; +import org.tukaani.xz.PowerPCOptions; +import org.tukaani.xz.SPARCOptions; +import org.tukaani.xz.X86Options; + +class Coders { + private static final Map<SevenZMethod, CoderBase> CODER_MAP = new HashMap<SevenZMethod, CoderBase>() { + + private static final long serialVersionUID = 1664829131806520867L; + { + put(SevenZMethod.COPY, new CopyDecoder()); + put(SevenZMethod.LZMA, new LZMADecoder()); + put(SevenZMethod.LZMA2, new LZMA2Decoder()); + put(SevenZMethod.DEFLATE, new DeflateDecoder()); + put(SevenZMethod.DEFLATE64, new Deflate64Decoder()); + put(SevenZMethod.BZIP2, new BZIP2Decoder()); + put(SevenZMethod.AES256SHA256, new AES256SHA256Decoder()); + put(SevenZMethod.BCJ_X86_FILTER, new BCJDecoder(new X86Options())); + put(SevenZMethod.BCJ_PPC_FILTER, new BCJDecoder(new PowerPCOptions())); + put(SevenZMethod.BCJ_IA64_FILTER, new BCJDecoder(new IA64Options())); + put(SevenZMethod.BCJ_ARM_FILTER, new BCJDecoder(new ARMOptions())); + put(SevenZMethod.BCJ_ARM_THUMB_FILTER, new BCJDecoder(new ARMThumbOptions())); + put(SevenZMethod.BCJ_SPARC_FILTER, new BCJDecoder(new SPARCOptions())); + put(SevenZMethod.DELTA_FILTER, new DeltaDecoder()); + }}; + + static CoderBase findByMethod(final SevenZMethod method) { + return CODER_MAP.get(method); + } + + static InputStream addDecoder(final String archiveName, final InputStream is, final long uncompressedLength, + final Coder coder, final byte[] password) throws IOException { + final CoderBase cb = findByMethod(SevenZMethod.byId(coder.decompressionMethodId)); + if (cb == null) { + throw new IOException("Unsupported compression method " + + Arrays.toString(coder.decompressionMethodId) + + " used in " + archiveName); + } + return cb.decode(archiveName, is, uncompressedLength, coder, password); + } + + static OutputStream addEncoder(final OutputStream out, final SevenZMethod method, + final Object options) throws IOException { + final CoderBase cb = findByMethod(method); + if (cb == null) { + throw new IOException("Unsupported compression method " + method); + } + return cb.encode(out, options); + } + + static class CopyDecoder extends CoderBase { + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password) throws IOException { + return in; + } + @Override + OutputStream encode(final OutputStream out, final Object options) { + return out; + } + } + + static class BCJDecoder extends CoderBase { + private final FilterOptions opts; + BCJDecoder(final FilterOptions opts) { + this.opts = opts; + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password) throws IOException { + try { + return opts.getInputStream(in); + } catch (final AssertionError e) { + throw new IOException("BCJ filter used in " + archiveName + + " needs XZ for Java > 1.4 - see " + + "https://commons.apache.org/proper/commons-compress/limitations.html#7Z", + e); + } + } + + @SuppressWarnings("resource") + @Override + OutputStream encode(final OutputStream out, final Object options) { + return new FlushShieldFilterOutputStream(opts.getOutputStream(new FinishableWrapperOutputStream(out))); + } + } + + static class DeflateDecoder extends CoderBase { + private static final byte[] ONE_ZERO_BYTE = new byte[1]; + DeflateDecoder() { + super(Number.class); + } + + @SuppressWarnings("resource") // caller must close the InputStream + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password) + throws IOException { + final Inflater inflater = new Inflater(true); + // Inflater with nowrap=true has this odd contract for a zero padding + // byte following the data stream; this used to be zlib's requirement + // and has been fixed a long time ago, but the contract persists so + // we comply. + // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) + final InflaterInputStream inflaterInputStream = new InflaterInputStream(new SequenceInputStream(in, + new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater); + return new DeflateDecoderInputStream(inflaterInputStream, inflater); + } + @Override + OutputStream encode(final OutputStream out, final Object options) { + final int level = numberOptionOrDefault(options, 9); + final Deflater deflater = new Deflater(level, true); + final DeflaterOutputStream deflaterOutputStream = new DeflaterOutputStream(out, deflater); + return new DeflateDecoderOutputStream(deflaterOutputStream, deflater); + } + + static class DeflateDecoderInputStream extends InputStream { + + InflaterInputStream inflaterInputStream; + Inflater inflater; + + public DeflateDecoderInputStream(InflaterInputStream inflaterInputStream, + Inflater inflater) { + this.inflaterInputStream = inflaterInputStream; + this.inflater = inflater; + } + + @Override + public int read() throws IOException { + return inflaterInputStream.read(); + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + return inflaterInputStream.read(b, off, len); + } + + @Override + public int read(final byte[] b) throws IOException { + return inflaterInputStream.read(b); + } + + @Override + public void close() throws IOException { + try { + inflaterInputStream.close(); + } finally { + inflater.end(); + } + } + } + + static class DeflateDecoderOutputStream extends OutputStream { + + DeflaterOutputStream deflaterOutputStream; + Deflater deflater; + + public DeflateDecoderOutputStream(DeflaterOutputStream deflaterOutputStream, + Deflater deflater) { + this.deflaterOutputStream = deflaterOutputStream; + this.deflater = deflater; + } + + @Override + public void write(final int b) throws IOException { + deflaterOutputStream.write(b); + } + + @Override + public void write(final byte[] b) throws IOException { + deflaterOutputStream.write(b); + } + + @Override + public void write(final byte[] b, final int off, final int len) throws IOException { + deflaterOutputStream.write(b, off, len); + } + + @Override + public void close() throws IOException { + try { + deflaterOutputStream.close(); + } finally { + deflater.end(); + } + } + } + } + + static class Deflate64Decoder extends CoderBase { + Deflate64Decoder() { + super(Number.class); + } + + @SuppressWarnings("resource") // caller must close the InputStream + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password) + throws IOException { + return new Deflate64CompressorInputStream(in); + } + } + + static class BZIP2Decoder extends CoderBase { + BZIP2Decoder() { + super(Number.class); + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password) + throws IOException { + return new BZip2CompressorInputStream(in); + } + @Override + OutputStream encode(final OutputStream out, final Object options) + throws IOException { + final int blockSize = numberOptionOrDefault(options, BZip2CompressorOutputStream.MAX_BLOCKSIZE); + return new BZip2CompressorOutputStream(out, blockSize); + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/DeltaDecoder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/DeltaDecoder.java new file mode 100644 index 000000000..bc58c636d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/DeltaDecoder.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.tukaani.xz.DeltaOptions; +import org.tukaani.xz.FinishableWrapperOutputStream; +import org.tukaani.xz.UnsupportedOptionsException; + +class DeltaDecoder extends CoderBase { + DeltaDecoder() { + super(Number.class); + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password) throws IOException { + return new DeltaOptions(getOptionsFromCoder(coder)).getInputStream(in); + } + + @SuppressWarnings("resource") + @Override + OutputStream encode(final OutputStream out, final Object options) throws IOException { + final int distance = numberOptionOrDefault(options, 1); + try { + return new DeltaOptions(distance).getOutputStream(new FinishableWrapperOutputStream(out)); + } catch (final UnsupportedOptionsException ex) { + throw new IOException(ex.getMessage()); + } + } + + @Override + byte[] getOptionsAsProperties(final Object options) { + return new byte[] { + (byte) (numberOptionOrDefault(options, 1) - 1) + }; + } + + @Override + Object getOptionsFromCoder(final Coder coder, final InputStream in) { + return getOptionsFromCoder(coder); + } + + private int getOptionsFromCoder(final Coder coder) { + if (coder.properties == null || coder.properties.length == 0) { + return 1; + } + return (0xff & coder.properties[0]) + 1; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java new file mode 100644 index 000000000..128cba928 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.LinkedList; + +/** + * The unit of solid compression. + */ +class Folder { + /// List of coders used in this folder, eg. one for compression, one for encryption. + Coder[] coders; + /// Total number of input streams across all coders. + /// this field is currently unused but technically part of the 7z API + long totalInputStreams; + /// Total number of output streams across all coders. + long totalOutputStreams; + /// Mapping between input and output streams. + BindPair[] bindPairs; + /// Indeces of input streams, one per input stream not listed in bindPairs. + long[] packedStreams; + /// Unpack sizes, per each output stream. + long[] unpackSizes; + /// Whether the folder has a CRC. + boolean hasCrc; + /// The CRC, if present. + long crc; + /// The number of unpack substreams, product of the number of + /// output streams and the nuber of non-empty files in this + /// folder. + int numUnpackSubStreams; + + /** + * Sorts Coders using bind pairs. + * <p>The first coder reads from the packed stream (we currently + * only support single input stream decoders), the second reads + * from the output of the first and so on.</p> + */ + Iterable<Coder> getOrderedCoders() { + final LinkedList<Coder> l = new LinkedList<>(); + int current = (int) packedStreams[0]; // more that 2^31 coders? + while (current != -1) { + l.addLast(coders[current]); + final int pair = findBindPairForOutStream(current); + current = pair != -1 ? (int) bindPairs[pair].inIndex : -1; + } + return l; + } + + int findBindPairForInStream(final int index) { + for (int i = 0; i < bindPairs.length; i++) { + if (bindPairs[i].inIndex == index) { + return i; + } + } + return -1; + } + + int findBindPairForOutStream(final int index) { + for (int i = 0; i < bindPairs.length; i++) { + if (bindPairs[i].outIndex == index) { + return i; + } + } + return -1; + } + + long getUnpackSize() { + if (totalOutputStreams == 0) { + return 0; + } + for (int i = ((int)totalOutputStreams) - 1; i >= 0; i--) { + if (findBindPairForOutStream(i) < 0) { + return unpackSizes[i]; + } + } + return 0; + } + + long getUnpackSizeForCoder(final Coder coder) { + if (coders != null) { + for (int i = 0; i < coders.length; i++) { + if (coders[i] == coder) { + return unpackSizes[i]; + } + } + } + return 0; + } + + @Override + public String toString() { + return "Folder with " + coders.length + " coders, " + totalInputStreams + + " input streams, " + totalOutputStreams + " output streams, " + + bindPairs.length + " bind pairs, " + packedStreams.length + + " packed streams, " + unpackSizes.length + " unpack sizes, " + + (hasCrc ? "with CRC " + crc : "without CRC") + + " and " + numUnpackSubStreams + " unpack streams"; + } +} + diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMA2Decoder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMA2Decoder.java new file mode 100644 index 000000000..0f13ca95c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMA2Decoder.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.tukaani.xz.FinishableWrapperOutputStream; +import org.tukaani.xz.FinishableOutputStream; +import org.tukaani.xz.LZMA2InputStream; +import org.tukaani.xz.LZMA2Options; + +class LZMA2Decoder extends CoderBase { + LZMA2Decoder() { + super(LZMA2Options.class, Number.class); + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password) throws IOException { + try { + final int dictionarySize = getDictionarySize(coder); + return new LZMA2InputStream(in, dictionarySize); + } catch (final IllegalArgumentException ex) { + throw new IOException(ex.getMessage()); + } + } + + @Override + OutputStream encode(final OutputStream out, final Object opts) + throws IOException { + final LZMA2Options options = getOptions(opts); + final FinishableOutputStream wrapped = new FinishableWrapperOutputStream(out); + return options.getOutputStream(wrapped); + } + + @Override + byte[] getOptionsAsProperties(final Object opts) { + final int dictSize = getDictSize(opts); + final int lead = Integer.numberOfLeadingZeros(dictSize); + final int secondBit = (dictSize >>> (30 - lead)) - 2; + return new byte[] { + (byte) ((19 - lead) * 2 + secondBit) + }; + } + + @Override + Object getOptionsFromCoder(final Coder coder, final InputStream in) { + return getDictionarySize(coder); + } + + private int getDictSize(final Object opts) { + if (opts instanceof LZMA2Options) { + return ((LZMA2Options) opts).getDictSize(); + } + return numberOptionOrDefault(opts); + } + + private int getDictionarySize(final Coder coder) throws IllegalArgumentException { + final int dictionarySizeBits = 0xff & coder.properties[0]; + if ((dictionarySizeBits & (~0x3f)) != 0) { + throw new IllegalArgumentException("Unsupported LZMA2 property bits"); + } + if (dictionarySizeBits > 40) { + throw new IllegalArgumentException("Dictionary larger than 4GiB maximum size"); + } + if (dictionarySizeBits == 40) { + return 0xFFFFffff; + } + return (2 | (dictionarySizeBits & 0x1)) << (dictionarySizeBits / 2 + 11); + } + + private LZMA2Options getOptions(final Object opts) throws IOException { + if (opts instanceof LZMA2Options) { + return (LZMA2Options) opts; + } + final LZMA2Options options = new LZMA2Options(); + options.setDictSize(numberOptionOrDefault(opts)); + return options; + } + + private int numberOptionOrDefault(final Object opts) { + return numberOptionOrDefault(opts, LZMA2Options.DICT_SIZE_DEFAULT); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMADecoder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMADecoder.java new file mode 100644 index 000000000..6e3d46ccc --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMADecoder.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.compress.utils.FlushShieldFilterOutputStream; +import org.tukaani.xz.LZMA2Options; +import org.tukaani.xz.LZMAInputStream; +import org.tukaani.xz.LZMAOutputStream; + +class LZMADecoder extends CoderBase { + LZMADecoder() { + super(LZMA2Options.class, Number.class); + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password) throws IOException { + final byte propsByte = coder.properties[0]; + final int dictSize = getDictionarySize(coder); + if (dictSize > LZMAInputStream.DICT_SIZE_MAX) { + throw new IOException("Dictionary larger than 4GiB maximum size used in " + archiveName); + } + return new LZMAInputStream(in, uncompressedLength, propsByte, dictSize); + } + + @SuppressWarnings("resource") + @Override + OutputStream encode(final OutputStream out, final Object opts) + throws IOException { + // NOOP as LZMAOutputStream throws an exception in flush + return new FlushShieldFilterOutputStream(new LZMAOutputStream(out, getOptions(opts), false)); + } + + @Override + byte[] getOptionsAsProperties(final Object opts) throws IOException { + final LZMA2Options options = getOptions(opts); + final byte props = (byte) ((options.getPb() * 5 + options.getLp()) * 9 + options.getLc()); + int dictSize = options.getDictSize(); + byte[] o = new byte[5]; + o[0] = props; + ByteUtils.toLittleEndian(o, dictSize, 1, 4); + return o; + } + + @Override + Object getOptionsFromCoder(final Coder coder, final InputStream in) throws IOException { + final byte propsByte = coder.properties[0]; + int props = propsByte & 0xFF; + int pb = props / (9 * 5); + props -= pb * 9 * 5; + int lp = props / 9; + int lc = props - lp * 9; + LZMA2Options opts = new LZMA2Options(); + opts.setPb(pb); + opts.setLcLp(lc, lp); + opts.setDictSize(getDictionarySize(coder)); + return opts; + } + + private int getDictionarySize(final Coder coder) throws IllegalArgumentException { + return (int) ByteUtils.fromLittleEndian(coder.properties, 1, 4); + } + + private LZMA2Options getOptions(final Object opts) throws IOException { + if (opts instanceof LZMA2Options) { + return (LZMA2Options) opts; + } + final LZMA2Options options = new LZMA2Options(); + options.setDictSize(numberOptionOrDefault(opts)); + return options; + } + + private int numberOptionOrDefault(final Object opts) { + return numberOptionOrDefault(opts, LZMA2Options.DICT_SIZE_DEFAULT); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java new file mode 100644 index 000000000..89a813a2a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +final class NID { + public static final int kEnd = 0x00; + public static final int kHeader = 0x01; + public static final int kArchiveProperties = 0x02; + public static final int kAdditionalStreamsInfo = 0x03; + public static final int kMainStreamsInfo = 0x04; + public static final int kFilesInfo = 0x05; + public static final int kPackInfo = 0x06; + public static final int kUnpackInfo = 0x07; + public static final int kSubStreamsInfo = 0x08; + public static final int kSize = 0x09; + public static final int kCRC = 0x0A; + public static final int kFolder = 0x0B; + public static final int kCodersUnpackSize = 0x0C; + public static final int kNumUnpackStream = 0x0D; + public static final int kEmptyStream = 0x0E; + public static final int kEmptyFile = 0x0F; + public static final int kAnti = 0x10; + public static final int kName = 0x11; + public static final int kCTime = 0x12; + public static final int kATime = 0x13; + public static final int kMTime = 0x14; + public static final int kWinAttributes = 0x15; + public static final int kComment = 0x16; + public static final int kEncodedHeader = 0x17; + public static final int kStartPos = 0x18; + public static final int kDummy = 0x19; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java new file mode 100644 index 000000000..f95426b65 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java @@ -0,0 +1,515 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; +import java.util.LinkedList; +import java.util.TimeZone; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * An entry in a 7z archive. + * + * @NotThreadSafe + * @since 1.6 + */ +public class SevenZArchiveEntry implements ArchiveEntry { + private String name; + private boolean hasStream; + private boolean isDirectory; + private boolean isAntiItem; + private boolean hasCreationDate; + private boolean hasLastModifiedDate; + private boolean hasAccessDate; + private long creationDate; + private long lastModifiedDate; + private long accessDate; + private boolean hasWindowsAttributes; + private int windowsAttributes; + private boolean hasCrc; + private long crc, compressedCrc; + private long size, compressedSize; + private Iterable<? extends SevenZMethodConfiguration> contentMethods; + + public SevenZArchiveEntry() { + } + + /** + * Get this entry's name. + * + * <p>This method returns the raw name as it is stored inside of the archive.</p> + * + * @return This entry's name. + */ + @Override + public String getName() { + return name; + } + + /** + * Set this entry's name. + * + * @param name This entry's new name. + */ + public void setName(final String name) { + this.name = name; + } + + /** + * Whether there is any content associated with this entry. + * @return whether there is any content associated with this entry. + */ + public boolean hasStream() { + return hasStream; + } + + /** + * Sets whether there is any content associated with this entry. + * @param hasStream whether there is any content associated with this entry. + */ + public void setHasStream(final boolean hasStream) { + this.hasStream = hasStream; + } + + /** + * Return whether or not this entry represents a directory. + * + * @return True if this entry is a directory. + */ + @Override + public boolean isDirectory() { + return isDirectory; + } + + /** + * Sets whether or not this entry represents a directory. + * + * @param isDirectory True if this entry is a directory. + */ + public void setDirectory(final boolean isDirectory) { + this.isDirectory = isDirectory; + } + + /** + * Indicates whether this is an "anti-item" used in differential backups, + * meaning it should delete the same file from a previous backup. + * @return true if it is an anti-item, false otherwise + */ + public boolean isAntiItem() { + return isAntiItem; + } + + /** + * Sets whether this is an "anti-item" used in differential backups, + * meaning it should delete the same file from a previous backup. + * @param isAntiItem true if it is an anti-item, false otherwise + */ + public void setAntiItem(final boolean isAntiItem) { + this.isAntiItem = isAntiItem; + } + + /** + * Returns whether this entry has got a creation date at all. + * @return whether the entry has got a creation date + */ + public boolean getHasCreationDate() { + return hasCreationDate; + } + + /** + * Sets whether this entry has got a creation date at all. + * @param hasCreationDate whether the entry has got a creation date + */ + public void setHasCreationDate(final boolean hasCreationDate) { + this.hasCreationDate = hasCreationDate; + } + + /** + * Gets the creation date. + * @throws UnsupportedOperationException if the entry hasn't got a + * creation date. + * @return the creation date + */ + public Date getCreationDate() { + if (hasCreationDate) { + return ntfsTimeToJavaTime(creationDate); + } + throw new UnsupportedOperationException( + "The entry doesn't have this timestamp"); + } + + /** + * Sets the creation date using NTFS time (100 nanosecond units + * since 1 January 1601) + * @param ntfsCreationDate the creation date + */ + public void setCreationDate(final long ntfsCreationDate) { + this.creationDate = ntfsCreationDate; + } + + /** + * Sets the creation date, + * @param creationDate the creation date + */ + public void setCreationDate(final Date creationDate) { + hasCreationDate = creationDate != null; + if (hasCreationDate) { + this.creationDate = javaTimeToNtfsTime(creationDate); + } + } + + /** + * Returns whether this entry has got a last modified date at all. + * @return whether this entry has got a last modified date at all + */ + public boolean getHasLastModifiedDate() { + return hasLastModifiedDate; + } + + /** + * Sets whether this entry has got a last modified date at all. + * @param hasLastModifiedDate whether this entry has got a last + * modified date at all + */ + public void setHasLastModifiedDate(final boolean hasLastModifiedDate) { + this.hasLastModifiedDate = hasLastModifiedDate; + } + + /** + * Gets the last modified date. + * @throws UnsupportedOperationException if the entry hasn't got a + * last modified date. + * @return the last modified date + */ + @Override + public Date getLastModifiedDate() { + if (hasLastModifiedDate) { + return ntfsTimeToJavaTime(lastModifiedDate); + } + throw new UnsupportedOperationException( + "The entry doesn't have this timestamp"); + } + + /** + * Sets the last modified date using NTFS time (100 nanosecond + * units since 1 January 1601) + * @param ntfsLastModifiedDate the last modified date + */ + public void setLastModifiedDate(final long ntfsLastModifiedDate) { + this.lastModifiedDate = ntfsLastModifiedDate; + } + + /** + * Sets the last modified date, + * @param lastModifiedDate the last modified date + */ + public void setLastModifiedDate(final Date lastModifiedDate) { + hasLastModifiedDate = lastModifiedDate != null; + if (hasLastModifiedDate) { + this.lastModifiedDate = javaTimeToNtfsTime(lastModifiedDate); + } + } + + /** + * Returns whether this entry has got an access date at all. + * @return whether this entry has got an access date at all. + */ + public boolean getHasAccessDate() { + return hasAccessDate; + } + + /** + * Sets whether this entry has got an access date at all. + * @param hasAcessDate whether this entry has got an access date at all. + */ + public void setHasAccessDate(final boolean hasAcessDate) { + this.hasAccessDate = hasAcessDate; + } + + /** + * Gets the access date. + * @throws UnsupportedOperationException if the entry hasn't got a + * access date. + * @return the access date + */ + public Date getAccessDate() { + if (hasAccessDate) { + return ntfsTimeToJavaTime(accessDate); + } + throw new UnsupportedOperationException( + "The entry doesn't have this timestamp"); + } + + /** + * Sets the access date using NTFS time (100 nanosecond units + * since 1 January 1601) + * @param ntfsAccessDate the access date + */ + public void setAccessDate(final long ntfsAccessDate) { + this.accessDate = ntfsAccessDate; + } + + /** + * Sets the access date, + * @param accessDate the access date + */ + public void setAccessDate(final Date accessDate) { + hasAccessDate = accessDate != null; + if (hasAccessDate) { + this.accessDate = javaTimeToNtfsTime(accessDate); + } + } + + /** + * Returns whether this entry has windows attributes. + * @return whether this entry has windows attributes. + */ + public boolean getHasWindowsAttributes() { + return hasWindowsAttributes; + } + + /** + * Sets whether this entry has windows attributes. + * @param hasWindowsAttributes whether this entry has windows attributes. + */ + public void setHasWindowsAttributes(final boolean hasWindowsAttributes) { + this.hasWindowsAttributes = hasWindowsAttributes; + } + + /** + * Gets the windows attributes. + * @return the windows attributes + */ + public int getWindowsAttributes() { + return windowsAttributes; + } + + /** + * Sets the windows attributes. + * @param windowsAttributes the windows attributes + */ + public void setWindowsAttributes(final int windowsAttributes) { + this.windowsAttributes = windowsAttributes; + } + + /** + * Returns whether this entry has got a crc. + * + * <p>In general entries without streams don't have a CRC either.</p> + * @return whether this entry has got a crc. + */ + public boolean getHasCrc() { + return hasCrc; + } + + /** + * Sets whether this entry has got a crc. + * @param hasCrc whether this entry has got a crc. + */ + public void setHasCrc(final boolean hasCrc) { + this.hasCrc = hasCrc; + } + + /** + * Gets the CRC. + * @deprecated use getCrcValue instead. + * @return the CRC + */ + @Deprecated + public int getCrc() { + return (int) crc; + } + + /** + * Sets the CRC. + * @deprecated use setCrcValue instead. + * @param crc the CRC + */ + @Deprecated + public void setCrc(final int crc) { + this.crc = crc; + } + + /** + * Gets the CRC. + * @since Compress 1.7 + * @return the CRC + */ + public long getCrcValue() { + return crc; + } + + /** + * Sets the CRC. + * @since Compress 1.7 + * @param crc the CRC + */ + public void setCrcValue(final long crc) { + this.crc = crc; + } + + /** + * Gets the compressed CRC. + * @deprecated use getCompressedCrcValue instead. + * @return the compressed CRC + */ + @Deprecated + int getCompressedCrc() { + return (int) compressedCrc; + } + + /** + * Sets the compressed CRC. + * @deprecated use setCompressedCrcValue instead. + * @param crc the CRC + */ + @Deprecated + void setCompressedCrc(final int crc) { + this.compressedCrc = crc; + } + + /** + * Gets the compressed CRC. + * @since Compress 1.7 + * @return the CRC + */ + long getCompressedCrcValue() { + return compressedCrc; + } + + /** + * Sets the compressed CRC. + * @since Compress 1.7 + * @param crc the CRC + */ + void setCompressedCrcValue(final long crc) { + this.compressedCrc = crc; + } + + /** + * Get this entry's file size. + * + * @return This entry's file size. + */ + @Override + public long getSize() { + return size; + } + + /** + * Set this entry's file size. + * + * @param size This entry's new file size. + */ + public void setSize(final long size) { + this.size = size; + } + + /** + * Get this entry's compressed file size. + * + * @return This entry's compressed file size. + */ + long getCompressedSize() { + return compressedSize; + } + + /** + * Set this entry's compressed file size. + * + * @param size This entry's new compressed file size. + */ + void setCompressedSize(final long size) { + this.compressedSize = size; + } + + /** + * Sets the (compression) methods to use for entry's content - the + * default is LZMA2. + * + * <p>Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported when writing archives.</p> + * + * <p>The methods will be consulted in iteration order to create + * the final output.</p> + * + * @param methods the methods to use for the content + * @since 1.8 + */ + public void setContentMethods(final Iterable<? extends SevenZMethodConfiguration> methods) { + if (methods != null) { + final LinkedList<SevenZMethodConfiguration> l = new LinkedList<>(); + for (final SevenZMethodConfiguration m : methods) { + l.addLast(m); + } + contentMethods = Collections.unmodifiableList(l); + } else { + contentMethods = null; + } + } + + /** + * Gets the (compression) methods to use for entry's content - the + * default is LZMA2. + * + * <p>Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported when writing archives.</p> + * + * <p>The methods will be consulted in iteration order to create + * the final output.</p> + * + * @since 1.8 + * @return the methods to use for the content + */ + public Iterable<? extends SevenZMethodConfiguration> getContentMethods() { + return contentMethods; + } + + /** + * Converts NTFS time (100 nanosecond units since 1 January 1601) + * to Java time. + * @param ntfsTime the NTFS time in 100 nanosecond units + * @return the Java time + */ + public static Date ntfsTimeToJavaTime(final long ntfsTime) { + final Calendar ntfsEpoch = Calendar.getInstance(); + ntfsEpoch.setTimeZone(TimeZone.getTimeZone("GMT+0")); + ntfsEpoch.set(1601, 0, 1, 0, 0, 0); + ntfsEpoch.set(Calendar.MILLISECOND, 0); + final long realTime = ntfsEpoch.getTimeInMillis() + (ntfsTime / (10*1000)); + return new Date(realTime); + } + + /** + * Converts Java time to NTFS time. + * @param date the Java time + * @return the NTFS time + */ + public static long javaTimeToNtfsTime(final Date date) { + final Calendar ntfsEpoch = Calendar.getInstance(); + ntfsEpoch.setTimeZone(TimeZone.getTimeZone("GMT+0")); + ntfsEpoch.set(1601, 0, 1, 0, 0, 0); + ntfsEpoch.set(Calendar.MILLISECOND, 0); + return ((date.getTime() - ntfsEpoch.getTimeInMillis())* 1000 * 10); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java new file mode 100644 index 000000000..421c34aba --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -0,0 +1,1211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.Closeable; +import java.io.DataInputStream; +import java.io.File; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.CharBuffer; +import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; +import java.nio.charset.CharsetEncoder; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.EnumSet; +import java.util.LinkedList; +import java.util.zip.CRC32; + +import org.apache.commons.compress.utils.BoundedInputStream; +import org.apache.commons.compress.utils.CRC32VerifyingInputStream; +import org.apache.commons.compress.utils.CharsetNames; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * Reads a 7z file, using SeekableByteChannel under + * the covers. + * <p> + * The 7z file format is a flexible container + * that can contain many compression and + * encryption types, but at the moment only + * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 + * are supported. + * <p> + * The format is very Windows/Intel specific, + * so it uses little-endian byte order, + * doesn't store user/group or permission bits, + * and represents times using NTFS timestamps + * (100 nanosecond units since 1 January 1601). + * Hence the official tools recommend against + * using it for backup purposes on *nix, and + * recommend .tar.7z or .tar.lzma or .tar.xz + * instead. + * <p> + * Both the header and file contents may be + * compressed and/or encrypted. With both + * encrypted, neither file names nor file + * contents can be read, but the use of + * encryption isn't plausibly deniable. + * + * @NotThreadSafe + * @since 1.6 + */ +public class SevenZFile implements Closeable { + static final int SIGNATURE_HEADER_SIZE = 32; + + private final String fileName; + private SeekableByteChannel channel; + private final Archive archive; + private int currentEntryIndex = -1; + private int currentFolderIndex = -1; + private InputStream currentFolderInputStream = null; + private byte[] password; + + private long compressedBytesReadFromCurrentEntry; + private long uncompressedBytesReadFromCurrentEntry; + + private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); + + // shared with SevenZOutputFile and tests, neither mutates it + static final byte[] sevenZSignature = { //NOSONAR + (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C + }; + + /** + * Reads a file as 7z archive + * + * @param filename the file to read + * @param password optional password if the archive is encrypted + * @throws IOException if reading the archive fails + * @since 1.17 + */ + public SevenZFile(final File filename, final char[] password) throws IOException { + this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)), + filename.getAbsolutePath(), utf16Decode(password), true); + } + + /** + * Reads a file as 7z archive + * + * @param filename the file to read + * @param password optional password if the archive is encrypted - + * the byte array is supposed to be the UTF16-LE encoded + * representation of the password. + * @throws IOException if reading the archive fails + * @deprecated use the char[]-arg version for the password instead + */ + public SevenZFile(final File filename, final byte[] password) throws IOException { + this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)), + filename.getAbsolutePath(), password, true); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the channel to read + * @throws IOException if reading the archive fails + * @since 1.13 + */ + public SevenZFile(final SeekableByteChannel channel) throws IOException { + this(channel, "unknown archive", (char[]) null); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the channel to read + * @param password optional password if the archive is encrypted + * @throws IOException if reading the archive fails + * @since 1.17 + */ + public SevenZFile(final SeekableByteChannel channel, + final char[] password) throws IOException { + this(channel, "unknown archive", utf16Decode(password)); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the channel to read + * @param filename name of the archive - only used for error reporting + * @param password optional password if the archive is encrypted + * @throws IOException if reading the archive fails + * @since 1.17 + */ + public SevenZFile(final SeekableByteChannel channel, String filename, + final char[] password) throws IOException { + this(channel, filename, utf16Decode(password), false); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the channel to read + * @param filename name of the archive - only used for error reporting + * @throws IOException if reading the archive fails + * @since 1.17 + */ + public SevenZFile(final SeekableByteChannel channel, String filename) + throws IOException { + this(channel, filename, null, false); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the channel to read + * @param password optional password if the archive is encrypted - + * the byte array is supposed to be the UTF16-LE encoded + * representation of the password. + * @throws IOException if reading the archive fails + * @since 1.13 + * @deprecated use the char[]-arg version for the password instead + */ + public SevenZFile(final SeekableByteChannel channel, + final byte[] password) throws IOException { + this(channel, "unknown archive", password); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the channel to read + * @param filename name of the archive - only used for error reporting + * @param password optional password if the archive is encrypted - + * the byte array is supposed to be the UTF16-LE encoded + * representation of the password. + * @throws IOException if reading the archive fails + * @since 1.13 + * @deprecated use the char[]-arg version for the password instead + */ + public SevenZFile(final SeekableByteChannel channel, String filename, + final byte[] password) throws IOException { + this(channel, filename, password, false); + } + + private SevenZFile(final SeekableByteChannel channel, String filename, + final byte[] password, boolean closeOnError) throws IOException { + boolean succeeded = false; + this.channel = channel; + this.fileName = filename; + try { + archive = readHeaders(password); + if (password != null) { + this.password = Arrays.copyOf(password, password.length); + } else { + this.password = null; + } + succeeded = true; + } finally { + if (!succeeded && closeOnError) { + this.channel.close(); + } + } + } + + /** + * Reads a file as unencrypted 7z archive + * + * @param filename the file to read + * @throws IOException if reading the archive fails + */ + public SevenZFile(final File filename) throws IOException { + this(filename, (char[]) null); + } + + /** + * Closes the archive. + * @throws IOException if closing the file fails + */ + @Override + public void close() throws IOException { + if (channel != null) { + try { + channel.close(); + } finally { + channel = null; + if (password != null) { + Arrays.fill(password, (byte) 0); + } + password = null; + } + } + } + + /** + * Returns the next Archive Entry in this archive. + * + * @return the next entry, + * or {@code null} if there are no more entries + * @throws IOException if the next entry could not be read + */ + public SevenZArchiveEntry getNextEntry() throws IOException { + if (currentEntryIndex >= archive.files.length - 1) { + return null; + } + ++currentEntryIndex; + final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; + buildDecodingStream(); + uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; + return entry; + } + + /** + * Returns meta-data of all archive entries. + * + * <p>This method only provides meta-data, the entries can not be + * used to read the contents, you still need to process all + * entries in order using {@link #getNextEntry} for that.</p> + * + * <p>The content methods are only available for entries that have + * already been reached via {@link #getNextEntry}.</p> + * + * @return meta-data of all archive entries. + * @since 1.11 + */ + public Iterable<SevenZArchiveEntry> getEntries() { + return Arrays.asList(archive.files); + } + + private Archive readHeaders(final byte[] password) throws IOException { + ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) + .order(ByteOrder.LITTLE_ENDIAN); + readFully(buf); + final byte[] signature = new byte[6]; + buf.get(signature); + if (!Arrays.equals(signature, sevenZSignature)) { + throw new IOException("Bad 7z signature"); + } + // 7zFormat.txt has it wrong - it's first major then minor + final byte archiveVersionMajor = buf.get(); + final byte archiveVersionMinor = buf.get(); + if (archiveVersionMajor != 0) { + throw new IOException(String.format("Unsupported 7z version (%d,%d)", + archiveVersionMajor, archiveVersionMinor)); + } + + final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); + final StartHeader startHeader = readStartHeader(startHeaderCrc); + + final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; + if (nextHeaderSizeInt != startHeader.nextHeaderSize) { + throw new IOException("cannot handle nextHeaderSize " + startHeader.nextHeaderSize); + } + channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); + buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); + readFully(buf); + final CRC32 crc = new CRC32(); + crc.update(buf.array()); + if (startHeader.nextHeaderCrc != crc.getValue()) { + throw new IOException("NextHeader CRC mismatch"); + } + + Archive archive = new Archive(); + int nid = getUnsignedByte(buf); + if (nid == NID.kEncodedHeader) { + buf = readEncodedHeader(buf, archive, password); + // Archive gets rebuilt with the new header + archive = new Archive(); + nid = getUnsignedByte(buf); + } + if (nid == NID.kHeader) { + readHeader(buf, archive); + } else { + throw new IOException("Broken or unsupported archive: no Header"); + } + return archive; + } + + private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { + final StartHeader startHeader = new StartHeader(); + // using Stream rather than ByteBuffer for the benefit of the + // built-in CRC check + try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( + new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { + startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); + startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); + startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); + return startHeader; + } + } + + private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { + int nid = getUnsignedByte(header); + + if (nid == NID.kArchiveProperties) { + readArchiveProperties(header); + nid = getUnsignedByte(header); + } + + if (nid == NID.kAdditionalStreamsInfo) { + throw new IOException("Additional streams unsupported"); + //nid = header.readUnsignedByte(); + } + + if (nid == NID.kMainStreamsInfo) { + readStreamsInfo(header, archive); + nid = getUnsignedByte(header); + } + + if (nid == NID.kFilesInfo) { + readFilesInfo(header, archive); + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated header, found " + nid); + } + } + + private void readArchiveProperties(final ByteBuffer input) throws IOException { + // FIXME: the reference implementation just throws them away? + int nid = getUnsignedByte(input); + while (nid != NID.kEnd) { + final long propertySize = readUint64(input); + final byte[] property = new byte[(int)propertySize]; + input.get(property); + nid = getUnsignedByte(input); + } + } + + private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, + final byte[] password) throws IOException { + readStreamsInfo(header, archive); + + // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? + final Folder folder = archive.folders[0]; + final int firstPackStreamIndex = 0; + final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + + 0; + + channel.position(folderOffset); + InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, + archive.packSizes[firstPackStreamIndex]); + for (final Coder coder : folder.getOrderedCoders()) { + if (coder.numInStreams != 1 || coder.numOutStreams != 1) { + throw new IOException("Multi input/output stream coders are not yet supported"); + } + inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR + folder.getUnpackSizeForCoder(coder), coder, password); + } + if (folder.hasCrc) { + inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, + folder.getUnpackSize(), folder.crc); + } + final byte[] nextHeader = new byte[(int)folder.getUnpackSize()]; + try (DataInputStream nextHeaderInputStream = new DataInputStream(inputStreamStack)) { + nextHeaderInputStream.readFully(nextHeader); + } + return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); + } + + private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { + int nid = getUnsignedByte(header); + + if (nid == NID.kPackInfo) { + readPackInfo(header, archive); + nid = getUnsignedByte(header); + } + + if (nid == NID.kUnpackInfo) { + readUnpackInfo(header, archive); + nid = getUnsignedByte(header); + } else { + // archive without unpack/coders info + archive.folders = new Folder[0]; + } + + if (nid == NID.kSubStreamsInfo) { + readSubStreamsInfo(header, archive); + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated StreamsInfo"); + } + } + + private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { + archive.packPos = readUint64(header); + final long numPackStreams = readUint64(header); + int nid = getUnsignedByte(header); + if (nid == NID.kSize) { + archive.packSizes = new long[(int)numPackStreams]; + for (int i = 0; i < archive.packSizes.length; i++) { + archive.packSizes[i] = readUint64(header); + } + nid = getUnsignedByte(header); + } + + if (nid == NID.kCRC) { + archive.packCrcsDefined = readAllOrBits(header, (int)numPackStreams); + archive.packCrcs = new long[(int)numPackStreams]; + for (int i = 0; i < (int)numPackStreams; i++) { + if (archive.packCrcsDefined.get(i)) { + archive.packCrcs[i] = 0xffffFFFFL & header.getInt(); + } + } + + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated PackInfo (" + nid + ")"); + } + } + + private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { + int nid = getUnsignedByte(header); + if (nid != NID.kFolder) { + throw new IOException("Expected kFolder, got " + nid); + } + final long numFolders = readUint64(header); + final Folder[] folders = new Folder[(int)numFolders]; + archive.folders = folders; + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("External unsupported"); + } + for (int i = 0; i < (int)numFolders; i++) { + folders[i] = readFolder(header); + } + + nid = getUnsignedByte(header); + if (nid != NID.kCodersUnpackSize) { + throw new IOException("Expected kCodersUnpackSize, got " + nid); + } + for (final Folder folder : folders) { + folder.unpackSizes = new long[(int)folder.totalOutputStreams]; + for (int i = 0; i < folder.totalOutputStreams; i++) { + folder.unpackSizes[i] = readUint64(header); + } + } + + nid = getUnsignedByte(header); + if (nid == NID.kCRC) { + final BitSet crcsDefined = readAllOrBits(header, (int)numFolders); + for (int i = 0; i < (int)numFolders; i++) { + if (crcsDefined.get(i)) { + folders[i].hasCrc = true; + folders[i].crc = 0xffffFFFFL & header.getInt(); + } else { + folders[i].hasCrc = false; + } + } + + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated UnpackInfo"); + } + } + + private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { + for (final Folder folder : archive.folders) { + folder.numUnpackSubStreams = 1; + } + int totalUnpackStreams = archive.folders.length; + + int nid = getUnsignedByte(header); + if (nid == NID.kNumUnpackStream) { + totalUnpackStreams = 0; + for (final Folder folder : archive.folders) { + final long numStreams = readUint64(header); + folder.numUnpackSubStreams = (int)numStreams; + totalUnpackStreams += numStreams; + } + nid = getUnsignedByte(header); + } + + final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); + subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; + subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); + subStreamsInfo.crcs = new long[totalUnpackStreams]; + + int nextUnpackStream = 0; + for (final Folder folder : archive.folders) { + if (folder.numUnpackSubStreams == 0) { + continue; + } + long sum = 0; + if (nid == NID.kSize) { + for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { + final long size = readUint64(header); + subStreamsInfo.unpackSizes[nextUnpackStream++] = size; + sum += size; + } + } + subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; + } + if (nid == NID.kSize) { + nid = getUnsignedByte(header); + } + + int numDigests = 0; + for (final Folder folder : archive.folders) { + if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { + numDigests += folder.numUnpackSubStreams; + } + } + + if (nid == NID.kCRC) { + final BitSet hasMissingCrc = readAllOrBits(header, numDigests); + final long[] missingCrcs = new long[numDigests]; + for (int i = 0; i < numDigests; i++) { + if (hasMissingCrc.get(i)) { + missingCrcs[i] = 0xffffFFFFL & header.getInt(); + } + } + int nextCrc = 0; + int nextMissingCrc = 0; + for (final Folder folder: archive.folders) { + if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { + subStreamsInfo.hasCrc.set(nextCrc, true); + subStreamsInfo.crcs[nextCrc] = folder.crc; + ++nextCrc; + } else { + for (int i = 0; i < folder.numUnpackSubStreams; i++) { + subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); + subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; + ++nextCrc; + ++nextMissingCrc; + } + } + } + + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated SubStreamsInfo"); + } + + archive.subStreamsInfo = subStreamsInfo; + } + + private Folder readFolder(final ByteBuffer header) throws IOException { + final Folder folder = new Folder(); + + final long numCoders = readUint64(header); + final Coder[] coders = new Coder[(int)numCoders]; + long totalInStreams = 0; + long totalOutStreams = 0; + for (int i = 0; i < coders.length; i++) { + coders[i] = new Coder(); + final int bits = getUnsignedByte(header); + final int idSize = bits & 0xf; + final boolean isSimple = (bits & 0x10) == 0; + final boolean hasAttributes = (bits & 0x20) != 0; + final boolean moreAlternativeMethods = (bits & 0x80) != 0; + + coders[i].decompressionMethodId = new byte[idSize]; + header.get(coders[i].decompressionMethodId); + if (isSimple) { + coders[i].numInStreams = 1; + coders[i].numOutStreams = 1; + } else { + coders[i].numInStreams = readUint64(header); + coders[i].numOutStreams = readUint64(header); + } + totalInStreams += coders[i].numInStreams; + totalOutStreams += coders[i].numOutStreams; + if (hasAttributes) { + final long propertiesSize = readUint64(header); + coders[i].properties = new byte[(int)propertiesSize]; + header.get(coders[i].properties); + } + // would need to keep looping as above: + while (moreAlternativeMethods) { + throw new IOException("Alternative methods are unsupported, please report. " + + "The reference implementation doesn't support them either."); + } + } + folder.coders = coders; + folder.totalInputStreams = totalInStreams; + folder.totalOutputStreams = totalOutStreams; + + if (totalOutStreams == 0) { + throw new IOException("Total output streams can't be 0"); + } + final long numBindPairs = totalOutStreams - 1; + final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; + for (int i = 0; i < bindPairs.length; i++) { + bindPairs[i] = new BindPair(); + bindPairs[i].inIndex = readUint64(header); + bindPairs[i].outIndex = readUint64(header); + } + folder.bindPairs = bindPairs; + + if (totalInStreams < numBindPairs) { + throw new IOException("Total input streams can't be less than the number of bind pairs"); + } + final long numPackedStreams = totalInStreams - numBindPairs; + final long packedStreams[] = new long[(int)numPackedStreams]; + if (numPackedStreams == 1) { + int i; + for (i = 0; i < (int)totalInStreams; i++) { + if (folder.findBindPairForInStream(i) < 0) { + break; + } + } + if (i == (int)totalInStreams) { + throw new IOException("Couldn't find stream's bind pair index"); + } + packedStreams[0] = i; + } else { + for (int i = 0; i < (int)numPackedStreams; i++) { + packedStreams[i] = readUint64(header); + } + } + folder.packedStreams = packedStreams; + + return folder; + } + + private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { + final int areAllDefined = getUnsignedByte(header); + final BitSet bits; + if (areAllDefined != 0) { + bits = new BitSet(size); + for (int i = 0; i < size; i++) { + bits.set(i, true); + } + } else { + bits = readBits(header, size); + } + return bits; + } + + private BitSet readBits(final ByteBuffer header, final int size) throws IOException { + final BitSet bits = new BitSet(size); + int mask = 0; + int cache = 0; + for (int i = 0; i < size; i++) { + if (mask == 0) { + mask = 0x80; + cache = getUnsignedByte(header); + } + bits.set(i, (cache & mask) != 0); + mask >>>= 1; + } + return bits; + } + + private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { + final long numFiles = readUint64(header); + final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int)numFiles]; + for (int i = 0; i < files.length; i++) { + files[i] = new SevenZArchiveEntry(); + } + BitSet isEmptyStream = null; + BitSet isEmptyFile = null; + BitSet isAnti = null; + while (true) { + final int propertyType = getUnsignedByte(header); + if (propertyType == 0) { + break; + } + final long size = readUint64(header); + switch (propertyType) { + case NID.kEmptyStream: { + isEmptyStream = readBits(header, files.length); + break; + } + case NID.kEmptyFile: { + if (isEmptyStream == null) { // protect against NPE + throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); + } + isEmptyFile = readBits(header, isEmptyStream.cardinality()); + break; + } + case NID.kAnti: { + if (isEmptyStream == null) { // protect against NPE + throw new IOException("Header format error: kEmptyStream must appear before kAnti"); + } + isAnti = readBits(header, isEmptyStream.cardinality()); + break; + } + case NID.kName: { + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Not implemented"); + } + if (((size - 1) & 1) != 0) { + throw new IOException("File names length invalid"); + } + final byte[] names = new byte[(int)(size - 1)]; + header.get(names); + int nextFile = 0; + int nextName = 0; + for (int i = 0; i < names.length; i += 2) { + if (names[i] == 0 && names[i+1] == 0) { + files[nextFile++].setName(new String(names, nextName, i-nextName, CharsetNames.UTF_16LE)); + nextName = i + 2; + } + } + if (nextName != names.length || nextFile != files.length) { + throw new IOException("Error parsing file names"); + } + break; + } + case NID.kCTime: { + final BitSet timesDefined = readAllOrBits(header, files.length); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Unimplemented"); + } + for (int i = 0; i < files.length; i++) { + files[i].setHasCreationDate(timesDefined.get(i)); + if (files[i].getHasCreationDate()) { + files[i].setCreationDate(header.getLong()); + } + } + break; + } + case NID.kATime: { + final BitSet timesDefined = readAllOrBits(header, files.length); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Unimplemented"); + } + for (int i = 0; i < files.length; i++) { + files[i].setHasAccessDate(timesDefined.get(i)); + if (files[i].getHasAccessDate()) { + files[i].setAccessDate(header.getLong()); + } + } + break; + } + case NID.kMTime: { + final BitSet timesDefined = readAllOrBits(header, files.length); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Unimplemented"); + } + for (int i = 0; i < files.length; i++) { + files[i].setHasLastModifiedDate(timesDefined.get(i)); + if (files[i].getHasLastModifiedDate()) { + files[i].setLastModifiedDate(header.getLong()); + } + } + break; + } + case NID.kWinAttributes: { + final BitSet attributesDefined = readAllOrBits(header, files.length); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Unimplemented"); + } + for (int i = 0; i < files.length; i++) { + files[i].setHasWindowsAttributes(attributesDefined.get(i)); + if (files[i].getHasWindowsAttributes()) { + files[i].setWindowsAttributes(header.getInt()); + } + } + break; + } + case NID.kStartPos: { + throw new IOException("kStartPos is unsupported, please report"); + } + case NID.kDummy: { + // 7z 9.20 asserts the content is all zeros and ignores the property + // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 + + if (skipBytesFully(header, size) < size) { + throw new IOException("Incomplete kDummy property"); + } + break; + } + + default: { + // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 + if (skipBytesFully(header, size) < size) { + throw new IOException("Incomplete property of type " + propertyType); + } + break; + } + } + } + int nonEmptyFileCounter = 0; + int emptyFileCounter = 0; + for (int i = 0; i < files.length; i++) { + files[i].setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); + if (files[i].hasStream()) { + files[i].setDirectory(false); + files[i].setAntiItem(false); + files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); + files[i].setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); + files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); + ++nonEmptyFileCounter; + } else { + files[i].setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); + files[i].setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); + files[i].setHasCrc(false); + files[i].setSize(0); + ++emptyFileCounter; + } + } + archive.files = files; + calculateStreamMap(archive); + } + + private void calculateStreamMap(final Archive archive) throws IOException { + final StreamMap streamMap = new StreamMap(); + + int nextFolderPackStreamIndex = 0; + final int numFolders = archive.folders != null ? archive.folders.length : 0; + streamMap.folderFirstPackStreamIndex = new int[numFolders]; + for (int i = 0; i < numFolders; i++) { + streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; + nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; + } + + long nextPackStreamOffset = 0; + final int numPackSizes = archive.packSizes != null ? archive.packSizes.length : 0; + streamMap.packStreamOffsets = new long[numPackSizes]; + for (int i = 0; i < numPackSizes; i++) { + streamMap.packStreamOffsets[i] = nextPackStreamOffset; + nextPackStreamOffset += archive.packSizes[i]; + } + + streamMap.folderFirstFileIndex = new int[numFolders]; + streamMap.fileFolderIndex = new int[archive.files.length]; + int nextFolderIndex = 0; + int nextFolderUnpackStreamIndex = 0; + for (int i = 0; i < archive.files.length; i++) { + if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { + streamMap.fileFolderIndex[i] = -1; + continue; + } + if (nextFolderUnpackStreamIndex == 0) { + for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { + streamMap.folderFirstFileIndex[nextFolderIndex] = i; + if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { + break; + } + } + if (nextFolderIndex >= archive.folders.length) { + throw new IOException("Too few folders in archive"); + } + } + streamMap.fileFolderIndex[i] = nextFolderIndex; + if (!archive.files[i].hasStream()) { + continue; + } + ++nextFolderUnpackStreamIndex; + if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { + ++nextFolderIndex; + nextFolderUnpackStreamIndex = 0; + } + } + + archive.streamMap = streamMap; + } + + private void buildDecodingStream() throws IOException { + final int folderIndex = archive.streamMap.fileFolderIndex[currentEntryIndex]; + if (folderIndex < 0) { + deferredBlockStreams.clear(); + // TODO: previously it'd return an empty stream? + // new BoundedInputStream(new ByteArrayInputStream(new byte[0]), 0); + return; + } + final SevenZArchiveEntry file = archive.files[currentEntryIndex]; + if (currentFolderIndex == folderIndex) { + // (COMPRESS-320). + // The current entry is within the same (potentially opened) folder. The + // previous stream has to be fully decoded before we can start reading + // but don't do it eagerly -- if the user skips over the entire folder nothing + // is effectively decompressed. + + file.setContentMethods(archive.files[currentEntryIndex - 1].getContentMethods()); + } else { + // We're opening a new folder. Discard any queued streams/ folder stream. + currentFolderIndex = folderIndex; + deferredBlockStreams.clear(); + if (currentFolderInputStream != null) { + currentFolderInputStream.close(); + currentFolderInputStream = null; + } + + final Folder folder = archive.folders[folderIndex]; + final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; + final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; + currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); + } + + InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); + if (file.getHasCrc()) { + fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); + } + + deferredBlockStreams.add(fileStream); + } + + private InputStream buildDecoderStack(final Folder folder, final long folderOffset, + final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { + channel.position(folderOffset); + InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( + new BoundedSeekableByteChannelInputStream(channel, + archive.packSizes[firstPackStreamIndex]))) { + @Override + public int read() throws IOException { + final int r = in.read(); + if (r >= 0) { + count(1); + } + return r; + } + @Override + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + final int r = in.read(b, off, len); + if (r >= 0) { + count(r); + } + return r; + } + private void count(int c) { + compressedBytesReadFromCurrentEntry += c; + } + }; + final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); + for (final Coder coder : folder.getOrderedCoders()) { + if (coder.numInStreams != 1 || coder.numOutStreams != 1) { + throw new IOException("Multi input/output stream coders are not yet supported"); + } + final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); + inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, + folder.getUnpackSizeForCoder(coder), coder, password); + methods.addFirst(new SevenZMethodConfiguration(method, + Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); + } + entry.setContentMethods(methods); + if (folder.hasCrc) { + return new CRC32VerifyingInputStream(inputStreamStack, + folder.getUnpackSize(), folder.crc); + } + return inputStreamStack; + } + + /** + * Reads a byte of data. + * + * @return the byte read, or -1 if end of input is reached + * @throws IOException + * if an I/O error has occurred + */ + public int read() throws IOException { + int b = getCurrentStream().read(); + if (b >= 0) { + uncompressedBytesReadFromCurrentEntry++; + } + return b; + } + + private InputStream getCurrentStream() throws IOException { + if (archive.files[currentEntryIndex].getSize() == 0) { + return new ByteArrayInputStream(new byte[0]); + } + if (deferredBlockStreams.isEmpty()) { + throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); + } + + while (deferredBlockStreams.size() > 1) { + // In solid compression mode we need to decompress all leading folder' + // streams to get access to an entry. We defer this until really needed + // so that entire blocks can be skipped without wasting time for decompression. + try (final InputStream stream = deferredBlockStreams.remove(0)) { + IOUtils.skip(stream, Long.MAX_VALUE); + } + compressedBytesReadFromCurrentEntry = 0; + } + + return deferredBlockStreams.get(0); + } + + /** + * Reads data into an array of bytes. + * + * @param b the array to write data to + * @return the number of bytes read, or -1 if end of input is reached + * @throws IOException + * if an I/O error has occurred + */ + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Reads data into an array of bytes. + * + * @param b the array to write data to + * @param off offset into the buffer to start filling at + * @param len of bytes to read + * @return the number of bytes read, or -1 if end of input is reached + * @throws IOException + * if an I/O error has occurred + */ + public int read(final byte[] b, final int off, final int len) throws IOException { + int cnt = getCurrentStream().read(b, off, len); + if (cnt > 0) { + uncompressedBytesReadFromCurrentEntry += cnt; + } + return cnt; + } + + /** + * Provides statistics for bytes read from the current entry. + * + * @return statistics for bytes read from the current entry + * @since 1.17 + */ + public InputStreamStatistics getStatisticsForCurrentEntry() { + return new InputStreamStatistics() { + @Override + public long getCompressedCount() { + return compressedBytesReadFromCurrentEntry; + } + @Override + public long getUncompressedCount() { + return uncompressedBytesReadFromCurrentEntry; + } + }; + } + + private static long readUint64(final ByteBuffer in) throws IOException { + // long rather than int as it might get shifted beyond the range of an int + final long firstByte = getUnsignedByte(in); + int mask = 0x80; + long value = 0; + for (int i = 0; i < 8; i++) { + if ((firstByte & mask) == 0) { + return value | ((firstByte & (mask - 1)) << (8 * i)); + } + final long nextByte = getUnsignedByte(in); + value |= nextByte << (8 * i); + mask >>>= 1; + } + return value; + } + + private static int getUnsignedByte(ByteBuffer buf) { + return buf.get() & 0xff; + } + + /** + * Checks if the signature matches what is expected for a 7z file. + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this is the signature of a 7z archive. + * @since 1.8 + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < sevenZSignature.length) { + return false; + } + + for (int i = 0; i < sevenZSignature.length; i++) { + if (signature[i] != sevenZSignature[i]) { + return false; + } + } + return true; + } + + private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) throws IOException { + if (bytesToSkip < 1) { + return 0; + } + int current = input.position(); + int maxSkip = input.remaining(); + if (maxSkip < bytesToSkip) { + bytesToSkip = maxSkip; + } + input.position(current + (int) bytesToSkip); + return bytesToSkip; + } + + private void readFully(ByteBuffer buf) throws IOException { + buf.rewind(); + IOUtils.readFully(channel, buf); + buf.flip(); + } + + @Override + public String toString() { + return archive.toString(); + } + + private static final CharsetEncoder PASSWORD_ENCODER = StandardCharsets.UTF_16LE.newEncoder(); + + private static byte[] utf16Decode(char[] chars) throws IOException { + if (chars == null) { + return null; + } + ByteBuffer encoded = PASSWORD_ENCODER.encode(CharBuffer.wrap(chars)); + if (encoded.hasArray()) { + return encoded.array(); + } + byte[] e = new byte[encoded.remaining()]; + encoded.get(e); + return e; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethod.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethod.java new file mode 100644 index 000000000..3c446cc2c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethod.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.Arrays; + +/** + * The (partially) supported compression/encryption methods used in 7z archives. + * + * <p>All methods with a _FILTER suffix are used as preprocessors with + * the goal of creating a better compression ratio with the compressor + * that comes next in the chain of methods. 7z will in general only + * allow them to be used together with a "real" compression method but + * Commons Compress doesn't enforce this.</p> + * + * <p>The BCJ_ filters work on executable files for the given platform + * and convert relative addresses to absolute addresses in CALL + * instructions. This means they are only useful when applied to + * executables of the chosen platform.</p> + */ +public enum SevenZMethod { + /** no compression at all */ + COPY(new byte[] { (byte)0x00 }), + /** LZMA - only supported when reading */ + LZMA(new byte[] { (byte)0x03, (byte)0x01, (byte)0x01 }), + /** LZMA2 */ + LZMA2(new byte[] { (byte)0x21 }), + /** Deflate */ + DEFLATE(new byte[] { (byte)0x04, (byte)0x01, (byte)0x08 }), + /** + * Deflate64 + * @since 1.16 + */ + DEFLATE64(new byte[] { (byte)0x04, (byte)0x01, (byte)0x09 }), + /** BZIP2 */ + BZIP2(new byte[] { (byte)0x04, (byte)0x02, (byte)0x02 }), + /** + * AES encryption with a key length of 256 bit using SHA256 for + * hashes - only supported when reading + */ + AES256SHA256(new byte[] { (byte)0x06, (byte)0xf1, (byte)0x07, (byte)0x01 }), + /** + * BCJ x86 platform version 1. + * @since 1.8 + */ + BCJ_X86_FILTER(new byte[] { 0x03, 0x03, 0x01, 0x03 }), + /** + * BCJ PowerPC platform. + * @since 1.8 + */ + BCJ_PPC_FILTER(new byte[] { 0x03, 0x03, 0x02, 0x05 }), + /** + * BCJ I64 platform. + * @since 1.8 + */ + BCJ_IA64_FILTER(new byte[] { 0x03, 0x03, 0x04, 0x01 }), + /** + * BCJ ARM platform. + * @since 1.8 + */ + BCJ_ARM_FILTER(new byte[] { 0x03, 0x03, 0x05, 0x01 }), + /** + * BCJ ARM Thumb platform. + * @since 1.8 + */ + BCJ_ARM_THUMB_FILTER(new byte[] { 0x03, 0x03, 0x07, 0x01 }), + /** + * BCJ Sparc platform. + * @since 1.8 + */ + BCJ_SPARC_FILTER(new byte[] { 0x03, 0x03, 0x08, 0x05 }), + /** + * Delta filter. + * @since 1.8 + */ + DELTA_FILTER(new byte[] { 0x03 }); + + private final byte[] id; + + SevenZMethod(final byte[] id) { + this.id = id; + } + + byte[] getId() { + final byte[] copy = new byte[id.length]; + System.arraycopy(id, 0, copy, 0, id.length); + return copy; + } + + static SevenZMethod byId(final byte[] id) { + for (final SevenZMethod m : SevenZMethod.class.getEnumConstants()) { + if (Arrays.equals(m.id, id)) { + return m; + } + } + return null; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfiguration.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfiguration.java new file mode 100644 index 000000000..bc47ee0a6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfiguration.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +/** + * Combines a SevenZMethod with configuration options for the method. + * + * <p>The exact type and interpretation of options depends on the + * method being configured. Currently supported are:</p> + * + * <table summary="Options"> + * <tr><th>Method</th><th>Option Type</th><th>Description</th></tr> + * <tr><td>BZIP2</td><td>Number</td><td>Block Size - an number between 1 and 9</td></tr> + * <tr><td>DEFLATE</td><td>Number</td><td>Compression Level - an number between 1 and 9</td></tr> + * <tr><td>LZMA2</td><td>Number</td><td>Dictionary Size - a number between 4096 and 768 MiB (768 << 20)</td></tr> + * <tr><td>LZMA2</td><td>org.tukaani.xz.LZMA2Options</td><td>Whole set of LZMA2 options.</td></tr> + * <tr><td>DELTA_FILTER</td><td>Number</td><td>Delta Distance - a number between 1 and 256</td></tr> + * </table> + * + * @Immutable + * @since 1.8 + */ +public class SevenZMethodConfiguration { + private final SevenZMethod method; + private final Object options; + + /** + * Doesn't configure any additional options. + * @param method the method to use + */ + public SevenZMethodConfiguration(final SevenZMethod method) { + this(method, null); + } + + /** + * Specifies and method plus configuration options. + * @param method the method to use + * @param options the options to use + * @throws IllegalArgumentException if the method doesn't understand the options specified. + */ + public SevenZMethodConfiguration(final SevenZMethod method, final Object options) { + this.method = method; + this.options = options; + if (options != null && !Coders.findByMethod(method).canAcceptOptions(options)) { + throw new IllegalArgumentException("The " + method + " method doesn't support options of type " + + options.getClass()); + } + } + + /** + * The specified method. + * @return the method + */ + public SevenZMethod getMethod() { + return method; + } + + /** + * The specified options. + * @return the options + */ + public Object getOptions() { + return options; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFile.java new file mode 100644 index 000000000..db47f44b4 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFile.java @@ -0,0 +1,811 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collections; +import java.util.Date; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.LinkedList; +import java.util.Map; +import java.util.zip.CRC32; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.utils.CountingOutputStream; + +/** + * Writes a 7z file. + * @since 1.6 + */ +public class SevenZOutputFile implements Closeable { + private final SeekableByteChannel channel; + private final List<SevenZArchiveEntry> files = new ArrayList<>(); + private int numNonEmptyStreams = 0; + private final CRC32 crc32 = new CRC32(); + private final CRC32 compressedCrc32 = new CRC32(); + private long fileBytesWritten = 0; + private boolean finished = false; + private CountingOutputStream currentOutputStream; + private CountingOutputStream[] additionalCountingStreams; + private Iterable<? extends SevenZMethodConfiguration> contentMethods = + Collections.singletonList(new SevenZMethodConfiguration(SevenZMethod.LZMA2)); + private final Map<SevenZArchiveEntry, long[]> additionalSizes = new HashMap<>(); + + /** + * Opens file to write a 7z archive to. + * + * @param filename the file to write to + * @throws IOException if opening the file fails + */ + public SevenZOutputFile(final File filename) throws IOException { + this(Files.newByteChannel(filename.toPath(), + EnumSet.of(StandardOpenOption.CREATE, StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING))); + } + + /** + * Prepares channel to write a 7z archive to. + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to write to an in-memory archive.</p> + * + * @param channel the channel to write to + * @throws IOException if the channel cannot be positioned properly + * @since 1.13 + */ + public SevenZOutputFile(final SeekableByteChannel channel) throws IOException { + this.channel = channel; + channel.position(SevenZFile.SIGNATURE_HEADER_SIZE); + } + + /** + * Sets the default compression method to use for entry contents - the + * default is LZMA2. + * + * <p>Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported.</p> + * + * <p>This is a short form for passing a single-element iterable + * to {@link #setContentMethods}.</p> + * @param method the default compression method + */ + public void setContentCompression(final SevenZMethod method) { + setContentMethods(Collections.singletonList(new SevenZMethodConfiguration(method))); + } + + /** + * Sets the default (compression) methods to use for entry contents - the + * default is LZMA2. + * + * <p>Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported.</p> + * + * <p>The methods will be consulted in iteration order to create + * the final output.</p> + * + * @since 1.8 + * @param methods the default (compression) methods + */ + public void setContentMethods(final Iterable<? extends SevenZMethodConfiguration> methods) { + this.contentMethods = reverse(methods); + } + + /** + * Closes the archive, calling {@link #finish} if necessary. + * + * @throws IOException on error + */ + @Override + public void close() throws IOException { + try { + if (!finished) { + finish(); + } + } finally { + channel.close(); + } + } + + /** + * Create an archive entry using the inputFile and entryName provided. + * + * @param inputFile file to create an entry from + * @param entryName the name to use + * @return the ArchiveEntry set up with details from the file + * + * @throws IOException on error + */ + public SevenZArchiveEntry createArchiveEntry(final File inputFile, + final String entryName) throws IOException { + final SevenZArchiveEntry entry = new SevenZArchiveEntry(); + entry.setDirectory(inputFile.isDirectory()); + entry.setName(entryName); + entry.setLastModifiedDate(new Date(inputFile.lastModified())); + return entry; + } + + /** + * Records an archive entry to add. + * + * The caller must then write the content to the archive and call + * {@link #closeArchiveEntry()} to complete the process. + * + * @param archiveEntry describes the entry + * @throws IOException on error + */ + public void putArchiveEntry(final ArchiveEntry archiveEntry) throws IOException { + final SevenZArchiveEntry entry = (SevenZArchiveEntry) archiveEntry; + files.add(entry); + } + + /** + * Closes the archive entry. + * @throws IOException on error + */ + public void closeArchiveEntry() throws IOException { + if (currentOutputStream != null) { + currentOutputStream.flush(); + currentOutputStream.close(); + } + + final SevenZArchiveEntry entry = files.get(files.size() - 1); + if (fileBytesWritten > 0) { // this implies currentOutputStream != null + entry.setHasStream(true); + ++numNonEmptyStreams; + entry.setSize(currentOutputStream.getBytesWritten()); //NOSONAR + entry.setCompressedSize(fileBytesWritten); + entry.setCrcValue(crc32.getValue()); + entry.setCompressedCrcValue(compressedCrc32.getValue()); + entry.setHasCrc(true); + if (additionalCountingStreams != null) { + final long[] sizes = new long[additionalCountingStreams.length]; + for (int i = 0; i < additionalCountingStreams.length; i++) { + sizes[i] = additionalCountingStreams[i].getBytesWritten(); + } + additionalSizes.put(entry, sizes); + } + } else { + entry.setHasStream(false); + entry.setSize(0); + entry.setCompressedSize(0); + entry.setHasCrc(false); + } + currentOutputStream = null; + additionalCountingStreams = null; + crc32.reset(); + compressedCrc32.reset(); + fileBytesWritten = 0; + } + + /** + * Writes a byte to the current archive entry. + * @param b The byte to be written. + * @throws IOException on error + */ + public void write(final int b) throws IOException { + getCurrentOutputStream().write(b); + } + + /** + * Writes a byte array to the current archive entry. + * @param b The byte array to be written. + * @throws IOException on error + */ + public void write(final byte[] b) throws IOException { + write(b, 0, b.length); + } + + /** + * Writes part of a byte array to the current archive entry. + * @param b The byte array to be written. + * @param off offset into the array to start writing from + * @param len number of bytes to write + * @throws IOException on error + */ + public void write(final byte[] b, final int off, final int len) throws IOException { + if (len > 0) { + getCurrentOutputStream().write(b, off, len); + } + } + + /** + * Finishes the addition of entries to this archive, without closing it. + * + * @throws IOException if archive is already closed. + */ + public void finish() throws IOException { + if (finished) { + throw new IOException("This archive has already been finished"); + } + finished = true; + + final long headerPosition = channel.position(); + + final ByteArrayOutputStream headerBaos = new ByteArrayOutputStream(); + final DataOutputStream header = new DataOutputStream(headerBaos); + + writeHeader(header); + header.flush(); + final byte[] headerBytes = headerBaos.toByteArray(); + channel.write(ByteBuffer.wrap(headerBytes)); + + final CRC32 crc32 = new CRC32(); + crc32.update(headerBytes); + + ByteBuffer bb = ByteBuffer.allocate(SevenZFile.sevenZSignature.length + + 2 /* version */ + + 4 /* start header CRC */ + + 8 /* next header position */ + + 8 /* next header length */ + + 4 /* next header CRC */) + .order(ByteOrder.LITTLE_ENDIAN); + // signature header + channel.position(0); + bb.put(SevenZFile.sevenZSignature); + // version + bb.put((byte) 0).put((byte) 2); + + // placeholder for start header CRC + bb.putInt(0); + + // start header + bb.putLong(headerPosition - SevenZFile.SIGNATURE_HEADER_SIZE) + .putLong(0xffffFFFFL & headerBytes.length) + .putInt((int) crc32.getValue()); + crc32.reset(); + crc32.update(bb.array(), SevenZFile.sevenZSignature.length + 6, 20); + bb.putInt(SevenZFile.sevenZSignature.length + 2, (int) crc32.getValue()); + bb.flip(); + channel.write(bb); + } + + /* + * Creation of output stream is deferred until data is actually + * written as some codecs might write header information even for + * empty streams and directories otherwise. + */ + private OutputStream getCurrentOutputStream() throws IOException { + if (currentOutputStream == null) { + currentOutputStream = setupFileOutputStream(); + } + return currentOutputStream; + } + + private CountingOutputStream setupFileOutputStream() throws IOException { + if (files.isEmpty()) { + throw new IllegalStateException("No current 7z entry"); + } + + OutputStream out = new OutputStreamWrapper(); + final ArrayList<CountingOutputStream> moreStreams = new ArrayList<>(); + boolean first = true; + for (final SevenZMethodConfiguration m : getContentMethods(files.get(files.size() - 1))) { + if (!first) { + final CountingOutputStream cos = new CountingOutputStream(out); + moreStreams.add(cos); + out = cos; + } + out = Coders.addEncoder(out, m.getMethod(), m.getOptions()); + first = false; + } + if (!moreStreams.isEmpty()) { + additionalCountingStreams = moreStreams.toArray(new CountingOutputStream[moreStreams.size()]); + } + return new CountingOutputStream(out) { + @Override + public void write(final int b) throws IOException { + super.write(b); + crc32.update(b); + } + + @Override + public void write(final byte[] b) throws IOException { + super.write(b); + crc32.update(b); + } + + @Override + public void write(final byte[] b, final int off, final int len) + throws IOException { + super.write(b, off, len); + crc32.update(b, off, len); + } + }; + } + + private Iterable<? extends SevenZMethodConfiguration> getContentMethods(final SevenZArchiveEntry entry) { + final Iterable<? extends SevenZMethodConfiguration> ms = entry.getContentMethods(); + return ms == null ? contentMethods : ms; + } + + private void writeHeader(final DataOutput header) throws IOException { + header.write(NID.kHeader); + + header.write(NID.kMainStreamsInfo); + writeStreamsInfo(header); + writeFilesInfo(header); + header.write(NID.kEnd); + } + + private void writeStreamsInfo(final DataOutput header) throws IOException { + if (numNonEmptyStreams > 0) { + writePackInfo(header); + writeUnpackInfo(header); + } + + writeSubStreamsInfo(header); + + header.write(NID.kEnd); + } + + private void writePackInfo(final DataOutput header) throws IOException { + header.write(NID.kPackInfo); + + writeUint64(header, 0); + writeUint64(header, 0xffffFFFFL & numNonEmptyStreams); + + header.write(NID.kSize); + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + writeUint64(header, entry.getCompressedSize()); + } + } + + header.write(NID.kCRC); + header.write(1); // "allAreDefined" == true + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + header.writeInt(Integer.reverseBytes((int) entry.getCompressedCrcValue())); + } + } + + header.write(NID.kEnd); + } + + private void writeUnpackInfo(final DataOutput header) throws IOException { + header.write(NID.kUnpackInfo); + + header.write(NID.kFolder); + writeUint64(header, numNonEmptyStreams); + header.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + writeFolder(header, entry); + } + } + + header.write(NID.kCodersUnpackSize); + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + final long[] moreSizes = additionalSizes.get(entry); + if (moreSizes != null) { + for (final long s : moreSizes) { + writeUint64(header, s); + } + } + writeUint64(header, entry.getSize()); + } + } + + header.write(NID.kCRC); + header.write(1); // "allAreDefined" == true + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + header.writeInt(Integer.reverseBytes((int) entry.getCrcValue())); + } + } + + header.write(NID.kEnd); + } + + private void writeFolder(final DataOutput header, final SevenZArchiveEntry entry) throws IOException { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int numCoders = 0; + for (final SevenZMethodConfiguration m : getContentMethods(entry)) { + numCoders++; + writeSingleCodec(m, bos); + } + + writeUint64(header, numCoders); + header.write(bos.toByteArray()); + for (long i = 0; i < numCoders - 1; i++) { + writeUint64(header, i + 1); + writeUint64(header, i); + } + } + + private void writeSingleCodec(final SevenZMethodConfiguration m, final OutputStream bos) throws IOException { + final byte[] id = m.getMethod().getId(); + final byte[] properties = Coders.findByMethod(m.getMethod()) + .getOptionsAsProperties(m.getOptions()); + + int codecFlags = id.length; + if (properties.length > 0) { + codecFlags |= 0x20; + } + bos.write(codecFlags); + bos.write(id); + + if (properties.length > 0) { + bos.write(properties.length); + bos.write(properties); + } + } + + private void writeSubStreamsInfo(final DataOutput header) throws IOException { + header.write(NID.kSubStreamsInfo); +// +// header.write(NID.kCRC); +// header.write(1); +// for (final SevenZArchiveEntry entry : files) { +// if (entry.getHasCrc()) { +// header.writeInt(Integer.reverseBytes(entry.getCrc())); +// } +// } +// + header.write(NID.kEnd); + } + + private void writeFilesInfo(final DataOutput header) throws IOException { + header.write(NID.kFilesInfo); + + writeUint64(header, files.size()); + + writeFileEmptyStreams(header); + writeFileEmptyFiles(header); + writeFileAntiItems(header); + writeFileNames(header); + writeFileCTimes(header); + writeFileATimes(header); + writeFileMTimes(header); + writeFileWindowsAttributes(header); + header.write(NID.kEnd); + } + + private void writeFileEmptyStreams(final DataOutput header) throws IOException { + boolean hasEmptyStreams = false; + for (final SevenZArchiveEntry entry : files) { + if (!entry.hasStream()) { + hasEmptyStreams = true; + break; + } + } + if (hasEmptyStreams) { + header.write(NID.kEmptyStream); + final BitSet emptyStreams = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + emptyStreams.set(i, !files.get(i).hasStream()); + } + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + writeBits(out, emptyStreams, files.size()); + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileEmptyFiles(final DataOutput header) throws IOException { + boolean hasEmptyFiles = false; + int emptyStreamCounter = 0; + final BitSet emptyFiles = new BitSet(0); + for (final SevenZArchiveEntry file1 : files) { + if (!file1.hasStream()) { + final boolean isDir = file1.isDirectory(); + emptyFiles.set(emptyStreamCounter++, !isDir); + hasEmptyFiles |= !isDir; + } + } + if (hasEmptyFiles) { + header.write(NID.kEmptyFile); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + writeBits(out, emptyFiles, emptyStreamCounter); + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileAntiItems(final DataOutput header) throws IOException { + boolean hasAntiItems = false; + final BitSet antiItems = new BitSet(0); + int antiItemCounter = 0; + for (final SevenZArchiveEntry file1 : files) { + if (!file1.hasStream()) { + final boolean isAnti = file1.isAntiItem(); + antiItems.set(antiItemCounter++, isAnti); + hasAntiItems |= isAnti; + } + } + if (hasAntiItems) { + header.write(NID.kAnti); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + writeBits(out, antiItems, antiItemCounter); + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileNames(final DataOutput header) throws IOException { + header.write(NID.kName); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + out.write(0); + for (final SevenZArchiveEntry entry : files) { + out.write(entry.getName().getBytes("UTF-16LE")); + out.writeShort(0); + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + + private void writeFileCTimes(final DataOutput header) throws IOException { + int numCreationDates = 0; + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasCreationDate()) { + ++numCreationDates; + } + } + if (numCreationDates > 0) { + header.write(NID.kCTime); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + if (numCreationDates != files.size()) { + out.write(0); + final BitSet cTimes = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + cTimes.set(i, files.get(i).getHasCreationDate()); + } + writeBits(out, cTimes, files.size()); + } else { + out.write(1); // "allAreDefined" == true + } + out.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasCreationDate()) { + out.writeLong(Long.reverseBytes( + SevenZArchiveEntry.javaTimeToNtfsTime(entry.getCreationDate()))); + } + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileATimes(final DataOutput header) throws IOException { + int numAccessDates = 0; + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasAccessDate()) { + ++numAccessDates; + } + } + if (numAccessDates > 0) { + header.write(NID.kATime); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + if (numAccessDates != files.size()) { + out.write(0); + final BitSet aTimes = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + aTimes.set(i, files.get(i).getHasAccessDate()); + } + writeBits(out, aTimes, files.size()); + } else { + out.write(1); // "allAreDefined" == true + } + out.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasAccessDate()) { + out.writeLong(Long.reverseBytes( + SevenZArchiveEntry.javaTimeToNtfsTime(entry.getAccessDate()))); + } + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileMTimes(final DataOutput header) throws IOException { + int numLastModifiedDates = 0; + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasLastModifiedDate()) { + ++numLastModifiedDates; + } + } + if (numLastModifiedDates > 0) { + header.write(NID.kMTime); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + if (numLastModifiedDates != files.size()) { + out.write(0); + final BitSet mTimes = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + mTimes.set(i, files.get(i).getHasLastModifiedDate()); + } + writeBits(out, mTimes, files.size()); + } else { + out.write(1); // "allAreDefined" == true + } + out.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasLastModifiedDate()) { + out.writeLong(Long.reverseBytes( + SevenZArchiveEntry.javaTimeToNtfsTime(entry.getLastModifiedDate()))); + } + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileWindowsAttributes(final DataOutput header) throws IOException { + int numWindowsAttributes = 0; + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasWindowsAttributes()) { + ++numWindowsAttributes; + } + } + if (numWindowsAttributes > 0) { + header.write(NID.kWinAttributes); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + if (numWindowsAttributes != files.size()) { + out.write(0); + final BitSet attributes = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + attributes.set(i, files.get(i).getHasWindowsAttributes()); + } + writeBits(out, attributes, files.size()); + } else { + out.write(1); // "allAreDefined" == true + } + out.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasWindowsAttributes()) { + out.writeInt(Integer.reverseBytes(entry.getWindowsAttributes())); + } + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeUint64(final DataOutput header, long value) throws IOException { + int firstByte = 0; + int mask = 0x80; + int i; + for (i = 0; i < 8; i++) { + if (value < ((1L << ( 7 * (i + 1))))) { + firstByte |= (value >>> (8 * i)); + break; + } + firstByte |= mask; + mask >>>= 1; + } + header.write(firstByte); + for (; i > 0; i--) { + header.write((int) (0xff & value)); + value >>>= 8; + } + } + + private void writeBits(final DataOutput header, final BitSet bits, final int length) throws IOException { + int cache = 0; + int shift = 7; + for (int i = 0; i < length; i++) { + cache |= ((bits.get(i) ? 1 : 0) << shift); + if (--shift < 0) { + header.write(cache); + shift = 7; + cache = 0; + } + } + if (shift != 7) { + header.write(cache); + } + } + + private static <T> Iterable<T> reverse(final Iterable<T> i) { + final LinkedList<T> l = new LinkedList<>(); + for (final T t : i) { + l.addFirst(t); + } + return l; + } + + private class OutputStreamWrapper extends OutputStream { + private static final int BUF_SIZE = 8192; + private final ByteBuffer buffer = ByteBuffer.allocate(BUF_SIZE); + @Override + public void write(final int b) throws IOException { + buffer.clear(); + buffer.put((byte) b).flip(); + channel.write(buffer); + compressedCrc32.update(b); + fileBytesWritten++; + } + + @Override + public void write(final byte[] b) throws IOException { + OutputStreamWrapper.this.write(b, 0, b.length); + } + + @Override + public void write(final byte[] b, final int off, final int len) + throws IOException { + if (len > BUF_SIZE) { + channel.write(ByteBuffer.wrap(b, off, len)); + } else { + buffer.clear(); + buffer.put(b, off, len).flip(); + channel.write(buffer); + } + compressedCrc32.update(b, off, len); + fileBytesWritten += len; + } + + @Override + public void flush() throws IOException { + // no reason to flush the channel + } + + @Override + public void close() throws IOException { + // the file will be closed by the containing class's close method + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java new file mode 100644 index 000000000..a33aca70f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +class StartHeader { + long nextHeaderOffset; + long nextHeaderSize; + long nextHeaderCrc; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java new file mode 100644 index 000000000..9a10e1e84 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +/// Map between folders, files and streams. +class StreamMap { + /// The first Archive.packStream index of each folder. + int[] folderFirstPackStreamIndex; + /// Offset to beginning of this pack stream's data, relative to the beginning of the first pack stream. + long[] packStreamOffsets; + /// Index of first file for each folder. + int[] folderFirstFileIndex; + /// Index of folder for each file. + int[] fileFolderIndex; + + @Override + public String toString() { + return "StreamMap with indices of " + folderFirstPackStreamIndex.length + + " folders, offsets of " + packStreamOffsets.length + " packed streams," + + " first files of " + folderFirstFileIndex.length + " folders and" + + " folder indices for " + fileFolderIndex.length + " files"; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java new file mode 100644 index 000000000..95fabc635 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.BitSet; + +/// Properties for non-empty files. +class SubStreamsInfo { + /// Unpacked size of each unpacked stream. + long[] unpackSizes; + /// Whether CRC is present for each unpacked stream. + BitSet hasCrc; + /// CRCs of unpacked streams, if present. + long[] crcs; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html b/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html new file mode 100644 index 000000000..975703b38 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides classes for reading and writing archives using + the 7z format.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java new file mode 100644 index 000000000..ac98f0afa --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java @@ -0,0 +1,1440 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.tar; + +import java.io.File; +import java.io.IOException; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.utils.ArchiveUtils; + +/** + * This class represents an entry in a Tar archive. It consists + * of the entry's header, as well as the entry's File. Entries + * can be instantiated in one of three ways, depending on how + * they are to be used. + * <p> + * TarEntries that are created from the header bytes read from + * an archive are instantiated with the TarEntry( byte[] ) + * constructor. These entries will be used when extracting from + * or listing the contents of an archive. These entries have their + * header filled in using the header bytes. They also set the File + * to null, since they reference an archive entry not a file. + * <p> + * TarEntries that are created from Files that are to be written + * into an archive are instantiated with the TarEntry( File ) + * constructor. These entries have their header filled in using + * the File's information. They also keep a reference to the File + * for convenience when writing entries. + * <p> + * Finally, TarEntries can be constructed from nothing but a name. + * This allows the programmer to construct the entry by hand, for + * instance when only an InputStream is available for writing to + * the archive, and the header information is constructed from + * other information. In this case the header fields are set to + * defaults and the File is set to null. + * + * <p> + * The C structure for a Tar Entry's header is: + * <pre> + * struct header { + * char name[100]; // TarConstants.NAMELEN - offset 0 + * char mode[8]; // TarConstants.MODELEN - offset 100 + * char uid[8]; // TarConstants.UIDLEN - offset 108 + * char gid[8]; // TarConstants.GIDLEN - offset 116 + * char size[12]; // TarConstants.SIZELEN - offset 124 + * char mtime[12]; // TarConstants.MODTIMELEN - offset 136 + * char chksum[8]; // TarConstants.CHKSUMLEN - offset 148 + * char linkflag[1]; // - offset 156 + * char linkname[100]; // TarConstants.NAMELEN - offset 157 + * The following fields are only present in new-style POSIX tar archives: + * char magic[6]; // TarConstants.MAGICLEN - offset 257 + * char version[2]; // TarConstants.VERSIONLEN - offset 263 + * char uname[32]; // TarConstants.UNAMELEN - offset 265 + * char gname[32]; // TarConstants.GNAMELEN - offset 297 + * char devmajor[8]; // TarConstants.DEVLEN - offset 329 + * char devminor[8]; // TarConstants.DEVLEN - offset 337 + * char prefix[155]; // TarConstants.PREFIXLEN - offset 345 + * // Used if "name" field is not long enough to hold the path + * char pad[12]; // NULs - offset 500 + * } header; + * All unused bytes are set to null. + * New-style GNU tar files are slightly different from the above. + * For values of size larger than 077777777777L (11 7s) + * or uid and gid larger than 07777777L (7 7s) + * the sign bit of the first byte is set, and the rest of the + * field is the binary representation of the number. + * See TarUtils.parseOctalOrBinary. + * </pre> + * + * <p> + * The C structure for a old GNU Tar Entry's header is: + * <pre> + * struct oldgnu_header { + * char unused_pad1[345]; // TarConstants.PAD1LEN_GNU - offset 0 + * char atime[12]; // TarConstants.ATIMELEN_GNU - offset 345 + * char ctime[12]; // TarConstants.CTIMELEN_GNU - offset 357 + * char offset[12]; // TarConstants.OFFSETLEN_GNU - offset 369 + * char longnames[4]; // TarConstants.LONGNAMESLEN_GNU - offset 381 + * char unused_pad2; // TarConstants.PAD2LEN_GNU - offset 385 + * struct sparse sp[4]; // TarConstants.SPARSELEN_GNU - offset 386 + * char isextended; // TarConstants.ISEXTENDEDLEN_GNU - offset 482 + * char realsize[12]; // TarConstants.REALSIZELEN_GNU - offset 483 + * char unused_pad[17]; // TarConstants.PAD3LEN_GNU - offset 495 + * }; + * </pre> + * Whereas, "struct sparse" is: + * <pre> + * struct sparse { + * char offset[12]; // offset 0 + * char numbytes[12]; // offset 12 + * }; + * </pre> + * + * <p> + * The C structure for a xstar (Jörg Schilling star) Tar Entry's header is: + * <pre> + * struct star_header { + * char name[100]; // offset 0 + * char mode[8]; // offset 100 + * char uid[8]; // offset 108 + * char gid[8]; // offset 116 + * char size[12]; // offset 124 + * char mtime[12]; // offset 136 + * char chksum[8]; // offset 148 + * char typeflag; // offset 156 + * char linkname[100]; // offset 157 + * char magic[6]; // offset 257 + * char version[2]; // offset 263 + * char uname[32]; // offset 265 + * char gname[32]; // offset 297 + * char devmajor[8]; // offset 329 + * char devminor[8]; // offset 337 + * char prefix[131]; // offset 345 + * char atime[12]; // offset 476 + * char ctime[12]; // offset 488 + * char mfill[8]; // offset 500 + * char xmagic[4]; // offset 508 "tar" + * }; + * </pre> + * <p>which is identical to new-style POSIX up to the first 130 bytes of the prefix.</p> + * + * @NotThreadSafe + */ + +public class TarArchiveEntry implements ArchiveEntry, TarConstants { + private static final TarArchiveEntry[] EMPTY_TAR_ARCHIVE_ENTRIES = new TarArchiveEntry[0]; + + /** The entry's name. */ + private String name = ""; + + /** Whether to allow leading slashes or drive names inside the name */ + private final boolean preserveAbsolutePath; + + /** The entry's permission mode. */ + private int mode; + + /** The entry's user id. */ + private long userId = 0; + + /** The entry's group id. */ + private long groupId = 0; + + /** The entry's size. */ + private long size = 0; + + /** The entry's modification time. */ + private long modTime; + + /** If the header checksum is reasonably correct. */ + private boolean checkSumOK; + + /** The entry's link flag. */ + private byte linkFlag; + + /** The entry's link name. */ + private String linkName = ""; + + /** The entry's magic tag. */ + private String magic = MAGIC_POSIX; + /** The version of the format */ + private String version = VERSION_POSIX; + + /** The entry's user name. */ + private String userName; + + /** The entry's group name. */ + private String groupName = ""; + + /** The entry's major device number. */ + private int devMajor = 0; + + /** The entry's minor device number. */ + private int devMinor = 0; + + /** If an extension sparse header follows. */ + private boolean isExtended; + + /** The entry's real size in case of a sparse file. */ + private long realSize; + + /** is this entry a GNU sparse entry using one of the PAX formats? */ + private boolean paxGNUSparse; + + /** is this entry a star sparse entry using the PAX header? */ + private boolean starSparse; + + /** The entry's file reference */ + private final File file; + + /** Extra, user supplied pax headers */ + private final Map<String,String> extraPaxHeaders = new HashMap<>(); + + /** Maximum length of a user's name in the tar file */ + public static final int MAX_NAMELEN = 31; + + /** Default permissions bits for directories */ + public static final int DEFAULT_DIR_MODE = 040755; + + /** Default permissions bits for files */ + public static final int DEFAULT_FILE_MODE = 0100644; + + /** Convert millis to seconds */ + public static final int MILLIS_PER_SECOND = 1000; + + + /** + * Construct an empty entry and prepares the header values. + */ + private TarArchiveEntry(boolean preserveAbsolutePath) { + String user = System.getProperty("user.name", ""); + + if (user.length() > MAX_NAMELEN) { + user = user.substring(0, MAX_NAMELEN); + } + + this.userName = user; + this.file = null; + this.preserveAbsolutePath = preserveAbsolutePath; + } + + /** + * Construct an entry with only a name. This allows the programmer + * to construct the entry's header "by hand". File is set to null. + * + * <p>The entry's name will be the value of the {@code name} + * argument with all file separators replaced by forward slashes + * and leading slashes as well as Windows drive letters stripped.</p> + * + * @param name the entry name + */ + public TarArchiveEntry(final String name) { + this(name, false); + } + + /** + * Construct an entry with only a name. This allows the programmer + * to construct the entry's header "by hand". File is set to null. + * + * <p>The entry's name will be the value of the {@code name} + * argument with all file separators replaced by forward slashes. + * Leading slashes and Windows drive letters are stripped if + * {@code preserveAbsolutePath} is {@code false}.</p> + * + * @param name the entry name + * @param preserveAbsolutePath whether to allow leading slashes + * or drive letters in the name. + * + * @since 1.1 + */ + public TarArchiveEntry(String name, final boolean preserveAbsolutePath) { + this(preserveAbsolutePath); + + name = normalizeFileName(name, preserveAbsolutePath); + final boolean isDir = name.endsWith("/"); + + this.name = name; + this.mode = isDir ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE; + this.linkFlag = isDir ? LF_DIR : LF_NORMAL; + this.modTime = new Date().getTime() / MILLIS_PER_SECOND; + this.userName = ""; + } + + /** + * Construct an entry with a name and a link flag. + * + * <p>The entry's name will be the value of the {@code name} + * argument with all file separators replaced by forward slashes + * and leading slashes as well as Windows drive letters + * stripped.</p> + * + * @param name the entry name + * @param linkFlag the entry link flag. + */ + public TarArchiveEntry(final String name, final byte linkFlag) { + this(name, linkFlag, false); + } + + /** + * Construct an entry with a name and a link flag. + * + * <p>The entry's name will be the value of the {@code name} + * argument with all file separators replaced by forward slashes. + * Leading slashes and Windows drive letters are stripped if + * {@code preserveAbsolutePath} is {@code false}.</p> + * + * @param name the entry name + * @param linkFlag the entry link flag. + * @param preserveAbsolutePath whether to allow leading slashes + * or drive letters in the name. + * + * @since 1.5 + */ + public TarArchiveEntry(final String name, final byte linkFlag, final boolean preserveAbsolutePath) { + this(name, preserveAbsolutePath); + this.linkFlag = linkFlag; + if (linkFlag == LF_GNUTYPE_LONGNAME) { + magic = MAGIC_GNU; + version = VERSION_GNU_SPACE; + } + } + + /** + * Construct an entry for a file. File is set to file, and the + * header is constructed from information from the file. + * The name is set from the normalized file path. + * + * <p>The entry's name will be the value of the {@code file}'s + * path with all file separators replaced by forward slashes and + * leading slashes as well as Windows drive letters stripped. The + * name will end in a slash if the {@code file} represents a + * directory.</p> + * + * @param file The file that the entry represents. + */ + public TarArchiveEntry(final File file) { + this(file, file.getPath()); + } + + /** + * Construct an entry for a file. File is set to file, and the + * header is constructed from information from the file. + * + * <p>The entry's name will be the value of the {@code fileName} + * argument with all file separators replaced by forward slashes + * and leading slashes as well as Windows drive letters stripped. + * The name will end in a slash if the {@code file} represents a + * directory.</p> + * + * @param file The file that the entry represents. + * @param fileName the name to be used for the entry. + */ + public TarArchiveEntry(final File file, final String fileName) { + final String normalizedName = normalizeFileName(fileName, false); + this.file = file; + + if (file.isDirectory()) { + this.mode = DEFAULT_DIR_MODE; + this.linkFlag = LF_DIR; + + final int nameLength = normalizedName.length(); + if (nameLength == 0 || normalizedName.charAt(nameLength - 1) != '/') { + this.name = normalizedName + "/"; + } else { + this.name = normalizedName; + } + } else { + this.mode = DEFAULT_FILE_MODE; + this.linkFlag = LF_NORMAL; + this.size = file.length(); + this.name = normalizedName; + } + + this.modTime = file.lastModified() / MILLIS_PER_SECOND; + this.userName = ""; + preserveAbsolutePath = false; + } + + /** + * Construct an entry from an archive's header bytes. File is set + * to null. + * + * @param headerBuf The header bytes from a tar archive entry. + * @throws IllegalArgumentException if any of the numeric fields have an invalid format + */ + public TarArchiveEntry(final byte[] headerBuf) { + this(false); + parseTarHeader(headerBuf); + } + + /** + * Construct an entry from an archive's header bytes. File is set + * to null. + * + * @param headerBuf The header bytes from a tar archive entry. + * @param encoding encoding to use for file names + * @since 1.4 + * @throws IllegalArgumentException if any of the numeric fields have an invalid format + * @throws IOException on error + */ + public TarArchiveEntry(final byte[] headerBuf, final ZipEncoding encoding) + throws IOException { + this(false); + parseTarHeader(headerBuf, encoding); + } + + /** + * Determine if the two entries are equal. Equality is determined + * by the header names being equal. + * + * @param it Entry to be checked for equality. + * @return True if the entries are equal. + */ + public boolean equals(final TarArchiveEntry it) { + return it != null && getName().equals(it.getName()); + } + + /** + * Determine if the two entries are equal. Equality is determined + * by the header names being equal. + * + * @param it Entry to be checked for equality. + * @return True if the entries are equal. + */ + @Override + public boolean equals(final Object it) { + if (it == null || getClass() != it.getClass()) { + return false; + } + return equals((TarArchiveEntry) it); + } + + /** + * Hashcodes are based on entry names. + * + * @return the entry hashcode + */ + @Override + public int hashCode() { + return getName().hashCode(); + } + + /** + * Determine if the given entry is a descendant of this entry. + * Descendancy is determined by the name of the descendant + * starting with this entry's name. + * + * @param desc Entry to be checked as a descendent of this. + * @return True if entry is a descendant of this. + */ + public boolean isDescendent(final TarArchiveEntry desc) { + return desc.getName().startsWith(getName()); + } + + /** + * Get this entry's name. + * + * <p>This method returns the raw name as it is stored inside of the archive.</p> + * + * @return This entry's name. + */ + @Override + public String getName() { + return name; + } + + /** + * Set this entry's name. + * + * @param name This entry's new name. + */ + public void setName(final String name) { + this.name = normalizeFileName(name, this.preserveAbsolutePath); + } + + /** + * Set the mode for this entry + * + * @param mode the mode for this entry + */ + public void setMode(final int mode) { + this.mode = mode; + } + + /** + * Get this entry's link name. + * + * @return This entry's link name. + */ + public String getLinkName() { + return linkName; + } + + /** + * Set this entry's link name. + * + * @param link the link name to use. + * + * @since 1.1 + */ + public void setLinkName(final String link) { + this.linkName = link; + } + + /** + * Get this entry's user id. + * + * @return This entry's user id. + * @deprecated use #getLongUserId instead as user ids can be + * bigger than {@link Integer#MAX_VALUE} + */ + @Deprecated + public int getUserId() { + return (int) (userId & 0xffffffff); + } + + /** + * Set this entry's user id. + * + * @param userId This entry's new user id. + */ + public void setUserId(final int userId) { + setUserId((long) userId); + } + + /** + * Get this entry's user id. + * + * @return This entry's user id. + * @since 1.10 + */ + public long getLongUserId() { + return userId; + } + + /** + * Set this entry's user id. + * + * @param userId This entry's new user id. + * @since 1.10 + */ + public void setUserId(final long userId) { + this.userId = userId; + } + + /** + * Get this entry's group id. + * + * @return This entry's group id. + * @deprecated use #getLongGroupId instead as group ids can be + * bigger than {@link Integer#MAX_VALUE} + */ + @Deprecated + public int getGroupId() { + return (int) (groupId & 0xffffffff); + } + + /** + * Set this entry's group id. + * + * @param groupId This entry's new group id. + */ + public void setGroupId(final int groupId) { + setGroupId((long) groupId); + } + + /** + * Get this entry's group id. + * + * @since 1.10 + * @return This entry's group id. + */ + public long getLongGroupId() { + return groupId; + } + + /** + * Set this entry's group id. + * + * @since 1.10 + * @param groupId This entry's new group id. + */ + public void setGroupId(final long groupId) { + this.groupId = groupId; + } + + /** + * Get this entry's user name. + * + * @return This entry's user name. + */ + public String getUserName() { + return userName; + } + + /** + * Set this entry's user name. + * + * @param userName This entry's new user name. + */ + public void setUserName(final String userName) { + this.userName = userName; + } + + /** + * Get this entry's group name. + * + * @return This entry's group name. + */ + public String getGroupName() { + return groupName; + } + + /** + * Set this entry's group name. + * + * @param groupName This entry's new group name. + */ + public void setGroupName(final String groupName) { + this.groupName = groupName; + } + + /** + * Convenience method to set this entry's group and user ids. + * + * @param userId This entry's new user id. + * @param groupId This entry's new group id. + */ + public void setIds(final int userId, final int groupId) { + setUserId(userId); + setGroupId(groupId); + } + + /** + * Convenience method to set this entry's group and user names. + * + * @param userName This entry's new user name. + * @param groupName This entry's new group name. + */ + public void setNames(final String userName, final String groupName) { + setUserName(userName); + setGroupName(groupName); + } + + /** + * Set this entry's modification time. The parameter passed + * to this method is in "Java time". + * + * @param time This entry's new modification time. + */ + public void setModTime(final long time) { + modTime = time / MILLIS_PER_SECOND; + } + + /** + * Set this entry's modification time. + * + * @param time This entry's new modification time. + */ + public void setModTime(final Date time) { + modTime = time.getTime() / MILLIS_PER_SECOND; + } + + /** + * Set this entry's modification time. + * + * @return time This entry's new modification time. + */ + public Date getModTime() { + return new Date(modTime * MILLIS_PER_SECOND); + } + + @Override + public Date getLastModifiedDate() { + return getModTime(); + } + + /** + * Get this entry's checksum status. + * + * @return if the header checksum is reasonably correct + * @see TarUtils#verifyCheckSum(byte[]) + * @since 1.5 + */ + public boolean isCheckSumOK() { + return checkSumOK; + } + + /** + * Get this entry's file. + * + * <p>This method is only useful for entries created from a {@code + * File} but not for entries read from an archive.</p> + * + * @return This entry's file. + */ + public File getFile() { + return file; + } + + /** + * Get this entry's mode. + * + * @return This entry's mode. + */ + public int getMode() { + return mode; + } + + /** + * Get this entry's file size. + * + * @return This entry's file size. + */ + @Override + public long getSize() { + return size; + } + + /** + * Set this entry's file size. + * + * @param size This entry's new file size. + * @throws IllegalArgumentException if the size is < 0. + */ + public void setSize(final long size) { + if (size < 0){ + throw new IllegalArgumentException("Size is out of range: "+size); + } + this.size = size; + } + + /** + * Get this entry's major device number. + * + * @return This entry's major device number. + * @since 1.4 + */ + public int getDevMajor() { + return devMajor; + } + + /** + * Set this entry's major device number. + * + * @param devNo This entry's major device number. + * @throws IllegalArgumentException if the devNo is < 0. + * @since 1.4 + */ + public void setDevMajor(final int devNo) { + if (devNo < 0){ + throw new IllegalArgumentException("Major device number is out of " + + "range: " + devNo); + } + this.devMajor = devNo; + } + + /** + * Get this entry's minor device number. + * + * @return This entry's minor device number. + * @since 1.4 + */ + public int getDevMinor() { + return devMinor; + } + + /** + * Set this entry's minor device number. + * + * @param devNo This entry's minor device number. + * @throws IllegalArgumentException if the devNo is < 0. + * @since 1.4 + */ + public void setDevMinor(final int devNo) { + if (devNo < 0){ + throw new IllegalArgumentException("Minor device number is out of " + + "range: " + devNo); + } + this.devMinor = devNo; + } + + /** + * Indicates in case of an oldgnu sparse file if an extension + * sparse header follows. + * + * @return true if an extension oldgnu sparse header follows. + */ + public boolean isExtended() { + return isExtended; + } + + /** + * Get this entry's real file size in case of a sparse file. + * + * @return This entry's real file size. + */ + public long getRealSize() { + return realSize; + } + + /** + * Indicate if this entry is a GNU sparse block. + * + * @return true if this is a sparse extension provided by GNU tar + */ + public boolean isGNUSparse() { + return isOldGNUSparse() || isPaxGNUSparse(); + } + + /** + * Indicate if this entry is a GNU or star sparse block using the + * oldgnu format. + * + * @return true if this is a sparse extension provided by GNU tar or star + * @since 1.11 + */ + public boolean isOldGNUSparse() { + return linkFlag == LF_GNUTYPE_SPARSE; + } + + /** + * Indicate if this entry is a GNU sparse block using one of the + * PAX formats. + * + * @return true if this is a sparse extension provided by GNU tar + * @since 1.11 + */ + public boolean isPaxGNUSparse() { + return paxGNUSparse; + } + + /** + * Indicate if this entry is a star sparse block using PAX headers. + * + * @return true if this is a sparse extension provided by star + * @since 1.11 + */ + public boolean isStarSparse() { + return starSparse; + } + + /** + * Indicate if this entry is a GNU long linkname block + * + * @return true if this is a long name extension provided by GNU tar + */ + public boolean isGNULongLinkEntry() { + return linkFlag == LF_GNUTYPE_LONGLINK; + } + + /** + * Indicate if this entry is a GNU long name block + * + * @return true if this is a long name extension provided by GNU tar + */ + public boolean isGNULongNameEntry() { + return linkFlag == LF_GNUTYPE_LONGNAME; + } + + /** + * Check if this is a Pax header. + * + * @return {@code true} if this is a Pax header. + * + * @since 1.1 + * + */ + public boolean isPaxHeader() { + return linkFlag == LF_PAX_EXTENDED_HEADER_LC + || linkFlag == LF_PAX_EXTENDED_HEADER_UC; + } + + /** + * Check if this is a Pax header. + * + * @return {@code true} if this is a Pax header. + * + * @since 1.1 + */ + public boolean isGlobalPaxHeader() { + return linkFlag == LF_PAX_GLOBAL_EXTENDED_HEADER; + } + + /** + * Return whether or not this entry represents a directory. + * + * @return True if this entry is a directory. + */ + @Override + public boolean isDirectory() { + if (file != null) { + return file.isDirectory(); + } + + if (linkFlag == LF_DIR) { + return true; + } + + return !isPaxHeader() && !isGlobalPaxHeader() && getName().endsWith("/"); + } + + /** + * Check if this is a "normal file" + * + * @since 1.2 + * @return whether this is a "normal file" + */ + public boolean isFile() { + if (file != null) { + return file.isFile(); + } + if (linkFlag == LF_OLDNORM || linkFlag == LF_NORMAL) { + return true; + } + return !getName().endsWith("/"); + } + + /** + * Check if this is a symbolic link entry. + * + * @since 1.2 + * @return whether this is a symbolic link + */ + public boolean isSymbolicLink() { + return linkFlag == LF_SYMLINK; + } + + /** + * Check if this is a link entry. + * + * @since 1.2 + * @return whether this is a link entry + */ + public boolean isLink() { + return linkFlag == LF_LINK; + } + + /** + * Check if this is a character device entry. + * + * @since 1.2 + * @return whether this is a character device + */ + public boolean isCharacterDevice() { + return linkFlag == LF_CHR; + } + + /** + * Check if this is a block device entry. + * + * @since 1.2 + * @return whether this is a block device + */ + public boolean isBlockDevice() { + return linkFlag == LF_BLK; + } + + /** + * Check if this is a FIFO (pipe) entry. + * + * @since 1.2 + * @return whether this is a FIFO entry + */ + public boolean isFIFO() { + return linkFlag == LF_FIFO; + } + + /** + * Check whether this is a sparse entry. + * + * @return whether this is a sparse entry + * @since 1.11 + */ + public boolean isSparse() { + return isGNUSparse() || isStarSparse(); + } + + /** + * get extra PAX Headers + * @return read-only map containing any extra PAX Headers + * @since 1.15 + */ + public Map<String, String> getExtraPaxHeaders() { + return Collections.unmodifiableMap(extraPaxHeaders); + } + + /** + * clear all extra PAX headers. + * @since 1.15 + */ + public void clearExtraPaxHeaders() { + extraPaxHeaders.clear(); + } + + /** + * add a PAX header to this entry. If the header corresponds to an existing field in the entry, + * that field will be set; otherwise the header will be added to the extraPaxHeaders Map + * @param name The full name of the header to set. + * @param value value of header. + * @since 1.15 + */ + public void addPaxHeader(String name,String value) { + processPaxHeader(name,value); + } + + /** + * get named extra PAX header + * @param name The full name of an extended PAX header to retrieve + * @return The value of the header, if any. + * @since 1.15 + */ + public String getExtraPaxHeader(String name) { + return extraPaxHeaders.get(name); + } + + /** + * Update the entry using a map of pax headers. + * @param headers + * @since 1.15 + */ + void updateEntryFromPaxHeaders(Map<String, String> headers) { + for (final Map.Entry<String, String> ent : headers.entrySet()) { + final String key = ent.getKey(); + final String val = ent.getValue(); + processPaxHeader(key, val, headers); + } + } + + /** + * process one pax header, using the entries extraPaxHeaders map as source for extra headers + * used when handling entries for sparse files. + * @param key + * @param val + * @since 1.15 + */ + private void processPaxHeader(String key, String val) { + processPaxHeader(key,val,extraPaxHeaders); + } + + /** + * Process one pax header, using the supplied map as source for extra headers to be used when handling + * entries for sparse files + * + * @param key the header name. + * @param val the header value. + * @param headers map of headers used for dealing with sparse file. + * @since 1.15 + */ + private void processPaxHeader(String key, String val, Map<String, String> headers) { + /* + * The following headers are defined for Pax. + * atime, ctime, charset: cannot use these without changing TarArchiveEntry fields + * mtime + * comment + * gid, gname + * linkpath + * size + * uid,uname + * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those + * + * GNU sparse files use additional members, we use + * GNU.sparse.size to detect the 0.0 and 0.1 versions and + * GNU.sparse.realsize for 1.0. + * + * star files use additional members of which we use + * SCHILY.filetype in order to detect star sparse files. + * + * If called from addExtraPaxHeader, these additional headers must be already present . + */ + switch (key) { + case "path": + setName(val); + break; + case "linkpath": + setLinkName(val); + break; + case "gid": + setGroupId(Long.parseLong(val)); + break; + case "gname": + setGroupName(val); + break; + case "uid": + setUserId(Long.parseLong(val)); + break; + case "uname": + setUserName(val); + break; + case "size": + setSize(Long.parseLong(val)); + break; + case "mtime": + setModTime((long) (Double.parseDouble(val) * 1000)); + break; + case "SCHILY.devminor": + setDevMinor(Integer.parseInt(val)); + break; + case "SCHILY.devmajor": + setDevMajor(Integer.parseInt(val)); + break; + case "GNU.sparse.size": + fillGNUSparse0xData(headers); + break; + case "GNU.sparse.realsize": + fillGNUSparse1xData(headers); + break; + case "SCHILY.filetype": + if ("sparse".equals(val)) { + fillStarSparseData(headers); + } + break; + default: + extraPaxHeaders.put(key,val); + } + } + + + + /** + * If this entry represents a file, and the file is a directory, return + * an array of TarEntries for this entry's children. + * + * <p>This method is only useful for entries created from a {@code + * File} but not for entries read from an archive.</p> + * + * @return An array of TarEntry's for this entry's children. + */ + public TarArchiveEntry[] getDirectoryEntries() { + if (file == null || !file.isDirectory()) { + return EMPTY_TAR_ARCHIVE_ENTRIES; + } + + final String[] list = file.list(); + if (list == null) { + return EMPTY_TAR_ARCHIVE_ENTRIES; + } + final TarArchiveEntry[] result = new TarArchiveEntry[list.length]; + + for (int i = 0; i < result.length; ++i) { + result[i] = new TarArchiveEntry(new File(file, list[i])); + } + + return result; + } + + /** + * Write an entry's header information to a header buffer. + * + * <p>This method does not use the star/GNU tar/BSD tar extensions.</p> + * + * @param outbuf The tar entry header buffer to fill in. + */ + public void writeEntryHeader(final byte[] outbuf) { + try { + writeEntryHeader(outbuf, TarUtils.DEFAULT_ENCODING, false); + } catch (final IOException ex) { + try { + writeEntryHeader(outbuf, TarUtils.FALLBACK_ENCODING, false); + } catch (final IOException ex2) { + // impossible + throw new RuntimeException(ex2); //NOSONAR + } + } + } + + /** + * Write an entry's header information to a header buffer. + * + * @param outbuf The tar entry header buffer to fill in. + * @param encoding encoding to use when writing the file name. + * @param starMode whether to use the star/GNU tar/BSD tar + * extension for numeric fields if their value doesn't fit in the + * maximum size of standard tar archives + * @since 1.4 + * @throws IOException on error + */ + public void writeEntryHeader(final byte[] outbuf, final ZipEncoding encoding, + final boolean starMode) throws IOException { + int offset = 0; + + offset = TarUtils.formatNameBytes(name, outbuf, offset, NAMELEN, + encoding); + offset = writeEntryHeaderField(mode, outbuf, offset, MODELEN, starMode); + offset = writeEntryHeaderField(userId, outbuf, offset, UIDLEN, + starMode); + offset = writeEntryHeaderField(groupId, outbuf, offset, GIDLEN, + starMode); + offset = writeEntryHeaderField(size, outbuf, offset, SIZELEN, starMode); + offset = writeEntryHeaderField(modTime, outbuf, offset, MODTIMELEN, + starMode); + + final int csOffset = offset; + + for (int c = 0; c < CHKSUMLEN; ++c) { + outbuf[offset++] = (byte) ' '; + } + + outbuf[offset++] = linkFlag; + offset = TarUtils.formatNameBytes(linkName, outbuf, offset, NAMELEN, + encoding); + offset = TarUtils.formatNameBytes(magic, outbuf, offset, MAGICLEN); + offset = TarUtils.formatNameBytes(version, outbuf, offset, VERSIONLEN); + offset = TarUtils.formatNameBytes(userName, outbuf, offset, UNAMELEN, + encoding); + offset = TarUtils.formatNameBytes(groupName, outbuf, offset, GNAMELEN, + encoding); + offset = writeEntryHeaderField(devMajor, outbuf, offset, DEVLEN, + starMode); + offset = writeEntryHeaderField(devMinor, outbuf, offset, DEVLEN, + starMode); + + while (offset < outbuf.length) { + outbuf[offset++] = 0; + } + + final long chk = TarUtils.computeCheckSum(outbuf); + + TarUtils.formatCheckSumOctalBytes(chk, outbuf, csOffset, CHKSUMLEN); + } + + private int writeEntryHeaderField(final long value, final byte[] outbuf, final int offset, + final int length, final boolean starMode) { + if (!starMode && (value < 0 + || value >= 1L << 3 * (length - 1))) { + // value doesn't fit into field when written as octal + // number, will be written to PAX header or causes an + // error + return TarUtils.formatLongOctalBytes(0, outbuf, offset, length); + } + return TarUtils.formatLongOctalOrBinaryBytes(value, outbuf, offset, + length); + } + + /** + * Parse an entry's header information from a header buffer. + * + * @param header The tar entry header buffer to get information from. + * @throws IllegalArgumentException if any of the numeric fields have an invalid format + */ + public void parseTarHeader(final byte[] header) { + try { + parseTarHeader(header, TarUtils.DEFAULT_ENCODING); + } catch (final IOException ex) { + try { + parseTarHeader(header, TarUtils.DEFAULT_ENCODING, true); + } catch (final IOException ex2) { + // not really possible + throw new RuntimeException(ex2); //NOSONAR + } + } + } + + /** + * Parse an entry's header information from a header buffer. + * + * @param header The tar entry header buffer to get information from. + * @param encoding encoding to use for file names + * @since 1.4 + * @throws IllegalArgumentException if any of the numeric fields + * have an invalid format + * @throws IOException on error + */ + public void parseTarHeader(final byte[] header, final ZipEncoding encoding) + throws IOException { + parseTarHeader(header, encoding, false); + } + + private void parseTarHeader(final byte[] header, final ZipEncoding encoding, + final boolean oldStyle) + throws IOException { + int offset = 0; + + name = oldStyle ? TarUtils.parseName(header, offset, NAMELEN) + : TarUtils.parseName(header, offset, NAMELEN, encoding); + offset += NAMELEN; + mode = (int) TarUtils.parseOctalOrBinary(header, offset, MODELEN); + offset += MODELEN; + userId = (int) TarUtils.parseOctalOrBinary(header, offset, UIDLEN); + offset += UIDLEN; + groupId = (int) TarUtils.parseOctalOrBinary(header, offset, GIDLEN); + offset += GIDLEN; + size = TarUtils.parseOctalOrBinary(header, offset, SIZELEN); + offset += SIZELEN; + modTime = TarUtils.parseOctalOrBinary(header, offset, MODTIMELEN); + offset += MODTIMELEN; + checkSumOK = TarUtils.verifyCheckSum(header); + offset += CHKSUMLEN; + linkFlag = header[offset++]; + linkName = oldStyle ? TarUtils.parseName(header, offset, NAMELEN) + : TarUtils.parseName(header, offset, NAMELEN, encoding); + offset += NAMELEN; + magic = TarUtils.parseName(header, offset, MAGICLEN); + offset += MAGICLEN; + version = TarUtils.parseName(header, offset, VERSIONLEN); + offset += VERSIONLEN; + userName = oldStyle ? TarUtils.parseName(header, offset, UNAMELEN) + : TarUtils.parseName(header, offset, UNAMELEN, encoding); + offset += UNAMELEN; + groupName = oldStyle ? TarUtils.parseName(header, offset, GNAMELEN) + : TarUtils.parseName(header, offset, GNAMELEN, encoding); + offset += GNAMELEN; + if (linkFlag == LF_CHR || linkFlag == LF_BLK) { + devMajor = (int) TarUtils.parseOctalOrBinary(header, offset, DEVLEN); + offset += DEVLEN; + devMinor = (int) TarUtils.parseOctalOrBinary(header, offset, DEVLEN); + offset += DEVLEN; + } else { + offset += 2 * DEVLEN; + } + + final int type = evaluateType(header); + switch (type) { + case FORMAT_OLDGNU: { + offset += ATIMELEN_GNU; + offset += CTIMELEN_GNU; + offset += OFFSETLEN_GNU; + offset += LONGNAMESLEN_GNU; + offset += PAD2LEN_GNU; + offset += SPARSELEN_GNU; + isExtended = TarUtils.parseBoolean(header, offset); + offset += ISEXTENDEDLEN_GNU; + realSize = TarUtils.parseOctal(header, offset, REALSIZELEN_GNU); + offset += REALSIZELEN_GNU; // NOSONAR - assignment as documentation + break; + } + case FORMAT_XSTAR: { + final String xstarPrefix = oldStyle + ? TarUtils.parseName(header, offset, PREFIXLEN_XSTAR) + : TarUtils.parseName(header, offset, PREFIXLEN_XSTAR, encoding); + if (xstarPrefix.length() > 0) { + name = xstarPrefix + "/" + name; + } + break; + } + case FORMAT_POSIX: + default: { + final String prefix = oldStyle + ? TarUtils.parseName(header, offset, PREFIXLEN) + : TarUtils.parseName(header, offset, PREFIXLEN, encoding); + // SunOS tar -E does not add / to directory names, so fix + // up to be consistent + if (isDirectory() && !name.endsWith("/")){ + name = name + "/"; + } + if (prefix.length() > 0){ + name = prefix + "/" + name; + } + } + } + } + + /** + * Strips Windows' drive letter as well as any leading slashes, + * turns path separators into forward slahes. + */ + private static String normalizeFileName(String fileName, + final boolean preserveAbsolutePath) { + if (!preserveAbsolutePath) { + final String osname = System.getProperty("os.name").toLowerCase(Locale.ENGLISH); + + if (osname != null) { + + // Strip off drive letters! + // REVIEW Would a better check be "(File.separator == '\')"? + + if (osname.startsWith("windows")) { + if (fileName.length() > 2) { + final char ch1 = fileName.charAt(0); + final char ch2 = fileName.charAt(1); + + if (ch2 == ':' + && (ch1 >= 'a' && ch1 <= 'z' + || ch1 >= 'A' && ch1 <= 'Z')) { + fileName = fileName.substring(2); + } + } + } else if (osname.contains("netware")) { + final int colon = fileName.indexOf(':'); + if (colon != -1) { + fileName = fileName.substring(colon + 1); + } + } + } + } + + fileName = fileName.replace(File.separatorChar, '/'); + + // No absolute pathnames + // Windows (and Posix?) paths can start with "\\NetworkDrive\", + // so we loop on starting /'s. + while (!preserveAbsolutePath && fileName.startsWith("/")) { + fileName = fileName.substring(1); + } + return fileName; + } + + /** + * Evaluate an entry's header format from a header buffer. + * + * @param header The tar entry header buffer to evaluate the format for. + * @return format type + */ + private int evaluateType(final byte[] header) { + if (ArchiveUtils.matchAsciiBuffer(MAGIC_GNU, header, MAGIC_OFFSET, MAGICLEN)) { + return FORMAT_OLDGNU; + } + if (ArchiveUtils.matchAsciiBuffer(MAGIC_POSIX, header, MAGIC_OFFSET, MAGICLEN)) { + if (ArchiveUtils.matchAsciiBuffer(MAGIC_XSTAR, header, XSTAR_MAGIC_OFFSET, + XSTAR_MAGIC_LEN)) { + return FORMAT_XSTAR; + } + return FORMAT_POSIX; + } + return 0; + } + + void fillGNUSparse0xData(final Map<String, String> headers) { + paxGNUSparse = true; + realSize = Integer.parseInt(headers.get("GNU.sparse.size")); + if (headers.containsKey("GNU.sparse.name")) { + // version 0.1 + name = headers.get("GNU.sparse.name"); + } + } + + void fillGNUSparse1xData(final Map<String, String> headers) { + paxGNUSparse = true; + realSize = Integer.parseInt(headers.get("GNU.sparse.realsize")); + name = headers.get("GNU.sparse.name"); + } + + void fillStarSparseData(final Map<String, String> headers) { + starSparse = true; + if (headers.containsKey("SCHILY.realsize")) { + realSize = Long.parseLong(headers.get("SCHILY.realsize")); + } + } +} + diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java new file mode 100644 index 000000000..daaf729f2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java @@ -0,0 +1,714 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * This package is based on the work done by Timothy Gerard Endres + * (time@ice.com) to whom the Ant project is very grateful for his great code. + */ + +package org.apache.commons.compress.archivers.tar; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; +import org.apache.commons.compress.utils.ArchiveUtils; +import org.apache.commons.compress.utils.CharsetNames; +import org.apache.commons.compress.utils.IOUtils; + +/** + * The TarInputStream reads a UNIX tar archive as an InputStream. + * methods are provided to position at each successive entry in + * the archive, and the read each entry as a normal input stream + * using read(). + * @NotThreadSafe + */ +public class TarArchiveInputStream extends ArchiveInputStream { + + private static final int SMALL_BUFFER_SIZE = 256; + + private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; + + /** The size the TAR header */ + private final int recordSize; + + /** The size of a block */ + private final int blockSize; + + /** True if file has hit EOF */ + private boolean hasHitEOF; + + /** Size of the current entry */ + private long entrySize; + + /** How far into the entry the stream is at */ + private long entryOffset; + + /** An input stream to read from */ + private final InputStream is; + + /** The meta-data about the current entry */ + private TarArchiveEntry currEntry; + + /** The encoding of the file */ + private final ZipEncoding zipEncoding; + + // the provided encoding (for unit tests) + final String encoding; + + // the global PAX header + private Map<String, String> globalPaxHeaders = new HashMap<>(); + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + */ + public TarArchiveInputStream(final InputStream is) { + this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param encoding name of the encoding to use for file names + * @since 1.4 + */ + public TarArchiveInputStream(final InputStream is, final String encoding) { + this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, + encoding); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param blockSize the block size to use + */ + public TarArchiveInputStream(final InputStream is, final int blockSize) { + this(is, blockSize, TarConstants.DEFAULT_RCDSIZE); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param blockSize the block size to use + * @param encoding name of the encoding to use for file names + * @since 1.4 + */ + public TarArchiveInputStream(final InputStream is, final int blockSize, + final String encoding) { + this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use + */ + public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize) { + this(is, blockSize, recordSize, null); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use + * @param encoding name of the encoding to use for file names + * @since 1.4 + */ + public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize, + final String encoding) { + this.is = is; + this.hasHitEOF = false; + this.encoding = encoding; + this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + this.recordSize = recordSize; + this.blockSize = blockSize; + } + + /** + * Closes this stream. Calls the TarBuffer's close() method. + * @throws IOException on error + */ + @Override + public void close() throws IOException { + is.close(); + } + + /** + * Get the record size being used by this stream's buffer. + * + * @return The TarBuffer record size. + */ + public int getRecordSize() { + return recordSize; + } + + /** + * Get the available data that can be read from the current + * entry in the archive. This does not indicate how much data + * is left in the entire archive, only in the current entry. + * This value is determined from the entry's size header field + * and the amount of data already read from the current entry. + * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE + * bytes are left in the current entry in the archive. + * + * @return The number of available bytes for the current entry. + * @throws IOException for signature + */ + @Override + public int available() throws IOException { + if (isDirectory()) { + return 0; + } + if (entrySize - entryOffset > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + return (int) (entrySize - entryOffset); + } + + + /** + * Skips over and discards <code>n</code> bytes of data from this input + * stream. The <code>skip</code> method may, for a variety of reasons, end + * up skipping over some smaller number of bytes, possibly <code>0</code>. + * This may result from any of a number of conditions; reaching end of file + * or end of entry before <code>n</code> bytes have been skipped; are only + * two possibilities. The actual number of bytes skipped is returned. If + * <code>n</code> is negative, no bytes are skipped. + * + * + * @param n + * the number of bytes to be skipped. + * @return the actual number of bytes skipped. + * @throws IOException + * if some other I/O error occurs. + */ + @Override + public long skip(final long n) throws IOException { + if (n <= 0 || isDirectory()) { + return 0; + } + + final long available = entrySize - entryOffset; + final long skipped = IOUtils.skip(is, Math.min(n, available)); + count(skipped); + entryOffset += skipped; + return skipped; + } + + /** + * Since we do not support marking just yet, we return false. + * + * @return False. + */ + @Override + public boolean markSupported() { + return false; + } + + /** + * Since we do not support marking just yet, we do nothing. + * + * @param markLimit The limit to mark. + */ + @Override + public void mark(final int markLimit) { + } + + /** + * Since we do not support marking just yet, we do nothing. + */ + @Override + public synchronized void reset() { + } + + /** + * Get the next entry in this tar archive. This will skip + * over any remaining data in the current entry, if there + * is one, and place the input stream at the header of the + * next entry, and read the header and instantiate a new + * TarEntry from the header bytes and return that entry. + * If there are no more entries in the archive, null will + * be returned to indicate that the end of the archive has + * been reached. + * + * @return The next TarEntry in the archive, or null. + * @throws IOException on error + */ + public TarArchiveEntry getNextTarEntry() throws IOException { + if (isAtEOF()) { + return null; + } + + if (currEntry != null) { + /* Skip will only go to the end of the current entry */ + IOUtils.skip(this, Long.MAX_VALUE); + + /* skip to the end of the last record */ + skipRecordPadding(); + } + + final byte[] headerBuf = getRecord(); + + if (headerBuf == null) { + /* hit EOF */ + currEntry = null; + return null; + } + + try { + currEntry = new TarArchiveEntry(headerBuf, zipEncoding); + } catch (final IllegalArgumentException e) { + throw new IOException("Error detected parsing the header", e); + } + + entryOffset = 0; + entrySize = currEntry.getSize(); + + if (currEntry.isGNULongLinkEntry()) { + final byte[] longLinkData = getLongNameData(); + if (longLinkData == null) { + // Bugzilla: 40334 + // Malformed tar file - long link entry name not followed by + // entry + return null; + } + currEntry.setLinkName(zipEncoding.decode(longLinkData)); + } + + if (currEntry.isGNULongNameEntry()) { + final byte[] longNameData = getLongNameData(); + if (longNameData == null) { + // Bugzilla: 40334 + // Malformed tar file - long entry name not followed by + // entry + return null; + } + currEntry.setName(zipEncoding.decode(longNameData)); + } + + if (currEntry.isGlobalPaxHeader()){ // Process Global Pax headers + readGlobalPaxHeaders(); + } + + if (currEntry.isPaxHeader()){ // Process Pax headers + paxHeaders(); + } else if (!globalPaxHeaders.isEmpty()) { + applyPaxHeadersToCurrentEntry(globalPaxHeaders); + } + + if (currEntry.isOldGNUSparse()){ // Process sparse files + readOldGNUSparse(); + } + + // If the size of the next element in the archive has changed + // due to a new size being reported in the posix header + // information, we update entrySize here so that it contains + // the correct value. + entrySize = currEntry.getSize(); + + return currEntry; + } + + /** + * The last record block should be written at the full size, so skip any + * additional space used to fill a record after an entry + */ + private void skipRecordPadding() throws IOException { + if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) { + final long numRecords = (this.entrySize / this.recordSize) + 1; + final long padding = (numRecords * this.recordSize) - this.entrySize; + final long skipped = IOUtils.skip(is, padding); + count(skipped); + } + } + + /** + * Get the next entry in this tar archive as longname data. + * + * @return The next entry in the archive as longname data, or null. + * @throws IOException on error + */ + protected byte[] getLongNameData() throws IOException { + // read in the name + final ByteArrayOutputStream longName = new ByteArrayOutputStream(); + int length = 0; + while ((length = read(smallBuf)) >= 0) { + longName.write(smallBuf, 0, length); + } + getNextEntry(); + if (currEntry == null) { + // Bugzilla: 40334 + // Malformed tar file - long entry name not followed by entry + return null; + } + byte[] longNameData = longName.toByteArray(); + // remove trailing null terminator(s) + length = longNameData.length; + while (length > 0 && longNameData[length - 1] == 0) { + --length; + } + if (length != longNameData.length) { + final byte[] l = new byte[length]; + System.arraycopy(longNameData, 0, l, 0, length); + longNameData = l; + } + return longNameData; + } + + /** + * Get the next record in this tar archive. This will skip + * over any remaining data in the current entry, if there + * is one, and place the input stream at the header of the + * next entry. + * + * <p>If there are no more entries in the archive, null will be + * returned to indicate that the end of the archive has been + * reached. At the same time the {@code hasHitEOF} marker will be + * set to true.</p> + * + * @return The next header in the archive, or null. + * @throws IOException on error + */ + private byte[] getRecord() throws IOException { + byte[] headerBuf = readRecord(); + setAtEOF(isEOFRecord(headerBuf)); + if (isAtEOF() && headerBuf != null) { + tryToConsumeSecondEOFRecord(); + consumeRemainderOfLastBlock(); + headerBuf = null; + } + return headerBuf; + } + + /** + * Determine if an archive record indicate End of Archive. End of + * archive is indicated by a record that consists entirely of null bytes. + * + * @param record The record data to check. + * @return true if the record data is an End of Archive + */ + protected boolean isEOFRecord(final byte[] record) { + return record == null || ArchiveUtils.isArrayZero(record, recordSize); + } + + /** + * Read a record from the input stream and return the data. + * + * @return The record data or null if EOF has been hit. + * @throws IOException on error + */ + protected byte[] readRecord() throws IOException { + + final byte[] record = new byte[recordSize]; + + final int readNow = IOUtils.readFully(is, record); + count(readNow); + if (readNow != recordSize) { + return null; + } + + return record; + } + + private void readGlobalPaxHeaders() throws IOException { + globalPaxHeaders = parsePaxHeaders(this); + getNextEntry(); // Get the actual file entry + } + + private void paxHeaders() throws IOException{ + final Map<String, String> headers = parsePaxHeaders(this); + getNextEntry(); // Get the actual file entry + applyPaxHeadersToCurrentEntry(headers); + } + + // NOTE, using a Map here makes it impossible to ever support GNU + // sparse files using the PAX Format 0.0, see + // https://www.gnu.org/software/tar/manual/html_section/tar_92.html#SEC188 + Map<String, String> parsePaxHeaders(final InputStream i) + throws IOException { + final Map<String, String> headers = new HashMap<>(globalPaxHeaders); + // Format is "length keyword=value\n"; + while(true){ // get length + int ch; + int len = 0; + int read = 0; + while((ch = i.read()) != -1) { + read++; + if (ch == '\n') { // blank line in header + break; + } else if (ch == ' '){ // End of length string + // Get keyword + final ByteArrayOutputStream coll = new ByteArrayOutputStream(); + while((ch = i.read()) != -1) { + read++; + if (ch == '='){ // end of keyword + final String keyword = coll.toString(CharsetNames.UTF_8); + // Get rest of entry + final int restLen = len - read; + if (restLen == 1) { // only NL + headers.remove(keyword); + } else { + final byte[] rest = new byte[restLen]; + final int got = IOUtils.readFully(i, rest); + if (got != restLen) { + throw new IOException("Failed to read " + + "Paxheader. Expected " + + restLen + + " bytes, read " + + got); + } + // Drop trailing NL + final String value = new String(rest, 0, + restLen - 1, CharsetNames.UTF_8); + headers.put(keyword, value); + } + break; + } + coll.write((byte) ch); + } + break; // Processed single header + } + len *= 10; + len += ch - '0'; + } + if (ch == -1){ // EOF + break; + } + } + return headers; + } + + private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers) { + currEntry.updateEntryFromPaxHeaders(headers); + + } + + /** + * Adds the sparse chunks from the current entry to the sparse chunks, + * including any additional sparse entries following the current entry. + * + * @throws IOException on error + * + * @todo Sparse files get not yet really processed. + */ + private void readOldGNUSparse() throws IOException { + /* we do not really process sparse files yet + sparses = new ArrayList(); + sparses.addAll(currEntry.getSparses()); + */ + if (currEntry.isExtended()) { + TarArchiveSparseEntry entry; + do { + final byte[] headerBuf = getRecord(); + if (headerBuf == null) { + currEntry = null; + break; + } + entry = new TarArchiveSparseEntry(headerBuf); + /* we do not really process sparse files yet + sparses.addAll(entry.getSparses()); + */ + } while (entry.isExtended()); + } + } + + private boolean isDirectory() { + return currEntry != null && currEntry.isDirectory(); + } + + /** + * Returns the next Archive Entry in this Stream. + * + * @return the next entry, + * or {@code null} if there are no more entries + * @throws IOException if the next entry could not be read + */ + @Override + public ArchiveEntry getNextEntry() throws IOException { + return getNextTarEntry(); + } + + /** + * Tries to read the next record rewinding the stream if it is not a EOF record. + * + * <p>This is meant to protect against cases where a tar + * implementation has written only one EOF record when two are + * expected. Actually this won't help since a non-conforming + * implementation likely won't fill full blocks consisting of - by + * default - ten records either so we probably have already read + * beyond the archive anyway.</p> + */ + private void tryToConsumeSecondEOFRecord() throws IOException { + boolean shouldReset = true; + final boolean marked = is.markSupported(); + if (marked) { + is.mark(recordSize); + } + try { + shouldReset = !isEOFRecord(readRecord()); + } finally { + if (shouldReset && marked) { + pushedBackBytes(recordSize); + is.reset(); + } + } + } + + /** + * Reads bytes from the current tar archive entry. + * + * This method is aware of the boundaries of the current + * entry in the archive and will deal with them as if they + * were this stream's start and EOF. + * + * @param buf The buffer into which to place bytes read. + * @param offset The offset at which to place bytes read. + * @param numToRead The number of bytes to read. + * @return The number of bytes read, or -1 at EOF. + * @throws IOException on error + */ + @Override + public int read(final byte[] buf, final int offset, int numToRead) throws IOException { + int totalRead = 0; + + if (isAtEOF() || isDirectory() || entryOffset >= entrySize) { + return -1; + } + + if (currEntry == null) { + throw new IllegalStateException("No current tar entry"); + } + + numToRead = Math.min(numToRead, available()); + + totalRead = is.read(buf, offset, numToRead); + + if (totalRead == -1) { + if (numToRead > 0) { + throw new IOException("Truncated TAR archive"); + } + setAtEOF(true); + } else { + count(totalRead); + entryOffset += totalRead; + } + + return totalRead; + } + + /** + * Whether this class is able to read the given entry. + * + * <p>May return false if the current entry is a sparse file.</p> + */ + @Override + public boolean canReadEntryData(final ArchiveEntry ae) { + if (ae instanceof TarArchiveEntry) { + final TarArchiveEntry te = (TarArchiveEntry) ae; + return !te.isSparse(); + } + return false; + } + + /** + * Get the current TAR Archive Entry that this input stream is processing + * + * @return The current Archive Entry + */ + public TarArchiveEntry getCurrentEntry() { + return currEntry; + } + + protected final void setCurrentEntry(final TarArchiveEntry e) { + currEntry = e; + } + + protected final boolean isAtEOF() { + return hasHitEOF; + } + + protected final void setAtEOF(final boolean b) { + hasHitEOF = b; + } + + /** + * This method is invoked once the end of the archive is hit, it + * tries to consume the remaining bytes under the assumption that + * the tool creating this archive has padded the last block. + */ + private void consumeRemainderOfLastBlock() throws IOException { + final long bytesReadOfLastBlock = getBytesRead() % blockSize; + if (bytesReadOfLastBlock > 0) { + final long skipped = IOUtils.skip(is, blockSize - bytesReadOfLastBlock); + count(skipped); + } + } + + /** + * Checks if the signature matches what is expected for a tar file. + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is a tar archive stream, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { + return false; + } + + if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, + signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) + && + ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, + signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) + ){ + return true; + } + if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, + signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) + && + ( + ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, + signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) + || + ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, + signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) + ) + ){ + return true; + } + // COMPRESS-107 - recognise Ant tar files + return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, + signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) + && + ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, + signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java new file mode 100644 index 000000000..c750e22b1 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java @@ -0,0 +1,699 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.tar; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.StringWriter; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; +import org.apache.commons.compress.utils.CharsetNames; +import org.apache.commons.compress.utils.CountingOutputStream; +import org.apache.commons.compress.utils.FixedLengthBlockOutputStream; + +/** + * The TarOutputStream writes a UNIX tar archive as an OutputStream. Methods are provided to put + * entries, and then write their contents by writing to this stream using write(). + * + * <p>tar archives consist of a sequence of records of 512 bytes each + * that are grouped into blocks. Prior to Apache Commons Compress 1.14 + * it has been possible to configure a record size different from 512 + * bytes and arbitrary block sizes. Starting with Compress 1.15 512 is + * the only valid option for the record size and the block size must + * be a multiple of 512. Also the default block size changed from + * 10240 bytes prior to Compress 1.15 to 512 bytes with Compress + * 1.15.</p> + * + * @NotThreadSafe + */ +public class TarArchiveOutputStream extends ArchiveOutputStream { + + /** + * Fail if a long file name is required in the archive. + */ + public static final int LONGFILE_ERROR = 0; + + /** + * Long paths will be truncated in the archive. + */ + public static final int LONGFILE_TRUNCATE = 1; + + /** + * GNU tar extensions are used to store long file names in the archive. + */ + public static final int LONGFILE_GNU = 2; + + /** + * POSIX/PAX extensions are used to store long file names in the archive. + */ + public static final int LONGFILE_POSIX = 3; + + /** + * Fail if a big number (e.g. size > 8GiB) is required in the archive. + */ + public static final int BIGNUMBER_ERROR = 0; + + /** + * star/GNU tar/BSD tar extensions are used to store big number in the archive. + */ + public static final int BIGNUMBER_STAR = 1; + + /** + * POSIX/PAX extensions are used to store big numbers in the archive. + */ + public static final int BIGNUMBER_POSIX = 2; + private static final int RECORD_SIZE = 512; + + private long currSize; + private String currName; + private long currBytes; + private final byte[] recordBuf; + private int longFileMode = LONGFILE_ERROR; + private int bigNumberMode = BIGNUMBER_ERROR; + private int recordsWritten; + private final int recordsPerBlock; + + private boolean closed = false; + + /** + * Indicates if putArchiveEntry has been called without closeArchiveEntry + */ + private boolean haveUnclosedEntry = false; + + /** + * indicates if this archive is finished + */ + private boolean finished = false; + + private final FixedLengthBlockOutputStream out; + private final CountingOutputStream countingOut; + + private final ZipEncoding zipEncoding; + + // the provided encoding (for unit tests) + final String encoding; + + private boolean addPaxHeadersForNonAsciiNames = false; + private static final ZipEncoding ASCII = + ZipEncodingHelper.getZipEncoding("ASCII"); + + private static final int BLOCK_SIZE_UNSPECIFIED = -511; + + /** + * Constructor for TarArchiveOutputStream. + * + * <p>Uses a block size of 512 bytes.</p> + * + * @param os the output stream to use + */ + public TarArchiveOutputStream(final OutputStream os) { + this(os, BLOCK_SIZE_UNSPECIFIED); + } + + /** + * Constructor for TarArchiveOutputStream. + * + * <p>Uses a block size of 512 bytes.</p> + * + * @param os the output stream to use + * @param encoding name of the encoding to use for file names + * @since 1.4 + */ + public TarArchiveOutputStream(final OutputStream os, final String encoding) { + this(os, BLOCK_SIZE_UNSPECIFIED, encoding); + } + + /** + * Constructor for TarArchiveOutputStream. + * + * @param os the output stream to use + * @param blockSize the block size to use. Must be a multiple of 512 bytes. + */ + public TarArchiveOutputStream(final OutputStream os, final int blockSize) { + this(os, blockSize, null); + } + + + /** + * Constructor for TarArchiveOutputStream. + * + * @param os the output stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use. Must be 512 bytes. + * @deprecated recordSize must always be 512 bytes. An IllegalArgumentException will be thrown + * if any other value is used + */ + @Deprecated + public TarArchiveOutputStream(final OutputStream os, final int blockSize, + final int recordSize) { + this(os, blockSize, recordSize, null); + } + + /** + * Constructor for TarArchiveOutputStream. + * + * @param os the output stream to use + * @param blockSize the block size to use . Must be a multiple of 512 bytes. + * @param recordSize the record size to use. Must be 512 bytes. + * @param encoding name of the encoding to use for file names + * @since 1.4 + * @deprecated recordSize must always be 512 bytes. An IllegalArgumentException will be thrown + * if any other value is used. + */ + @Deprecated + public TarArchiveOutputStream(final OutputStream os, final int blockSize, + final int recordSize, final String encoding) { + this(os, blockSize, encoding); + if (recordSize != RECORD_SIZE) { + throw new IllegalArgumentException( + "Tar record size must always be 512 bytes. Attempt to set size of " + recordSize); + } + + } + + /** + * Constructor for TarArchiveOutputStream. + * + * @param os the output stream to use + * @param blockSize the block size to use. Must be a multiple of 512 bytes. + * @param encoding name of the encoding to use for file names + * @since 1.4 + */ + public TarArchiveOutputStream(final OutputStream os, final int blockSize, + final String encoding) { + int realBlockSize; + if (BLOCK_SIZE_UNSPECIFIED == blockSize) { + realBlockSize = RECORD_SIZE; + } else { + realBlockSize = blockSize; + } + + if (realBlockSize <=0 || realBlockSize % RECORD_SIZE != 0) { + throw new IllegalArgumentException("Block size must be a multiple of 512 bytes. Attempt to use set size of " + blockSize); + } + out = new FixedLengthBlockOutputStream(countingOut = new CountingOutputStream(os), + RECORD_SIZE); + this.encoding = encoding; + this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + + this.recordBuf = new byte[RECORD_SIZE]; + this.recordsPerBlock = realBlockSize / RECORD_SIZE; + } + + /** + * Set the long file mode. This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or + * LONGFILE_GNU(2). This specifies the treatment of long file names (names >= + * TarConstants.NAMELEN). Default is LONGFILE_ERROR. + * + * @param longFileMode the mode to use + */ + public void setLongFileMode(final int longFileMode) { + this.longFileMode = longFileMode; + } + + /** + * Set the big number mode. This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or + * BIGNUMBER_STAR(2). This specifies the treatment of big files (sizes > + * TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header. + * Default is BIGNUMBER_ERROR. + * + * @param bigNumberMode the mode to use + * @since 1.4 + */ + public void setBigNumberMode(final int bigNumberMode) { + this.bigNumberMode = bigNumberMode; + } + + /** + * Whether to add a PAX extension header for non-ASCII file names. + * + * @param b whether to add a PAX extension header for non-ASCII file names. + * @since 1.4 + */ + public void setAddPaxHeadersForNonAsciiNames(final boolean b) { + addPaxHeadersForNonAsciiNames = b; + } + + @Deprecated + @Override + public int getCount() { + return (int) getBytesWritten(); + } + + @Override + public long getBytesWritten() { + return countingOut.getBytesWritten(); + } + + /** + * Ends the TAR archive without closing the underlying OutputStream. + * + * An archive consists of a series of file entries terminated by an + * end-of-archive entry, which consists of two 512 blocks of zero bytes. + * POSIX.1 requires two EOF records, like some other implementations. + * + * @throws IOException on error + */ + @Override + public void finish() throws IOException { + if (finished) { + throw new IOException("This archive has already been finished"); + } + + if (haveUnclosedEntry) { + throw new IOException("This archive contains unclosed entries."); + } + writeEOFRecord(); + writeEOFRecord(); + padAsNeeded(); + out.flush(); + finished = true; + } + + /** + * Closes the underlying OutputStream. + * + * @throws IOException on error + */ + @Override + public void close() throws IOException { + try { + if (!finished) { + finish(); + } + } finally { + if (!closed) { + out.close(); + closed = true; + } + } + } + + /** + * Get the record size being used by this stream's TarBuffer. + * + * @return The TarBuffer record size. + * @deprecated + */ + @Deprecated + public int getRecordSize() { + return RECORD_SIZE; + } + + /** + * Put an entry on the output stream. This writes the entry's header record and positions the + * output stream for writing the contents of the entry. Once this method is called, the stream + * is ready for calls to write() to write the entry's contents. Once the contents are written, + * closeArchiveEntry() <B>MUST</B> be called to ensure that all buffered data is completely + * written to the output stream. + * + * @param archiveEntry The TarEntry to be written to the archive. + * @throws IOException on error + * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry + */ + @Override + public void putArchiveEntry(final ArchiveEntry archiveEntry) throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + final TarArchiveEntry entry = (TarArchiveEntry) archiveEntry; + if (entry.isGlobalPaxHeader()) { + final byte[] data = encodeExtendedPaxHeadersContents(entry.getExtraPaxHeaders()); + entry.setSize(data.length); + entry.writeEntryHeader(recordBuf, zipEncoding, bigNumberMode == BIGNUMBER_STAR); + writeRecord(recordBuf); + currSize= entry.getSize(); + currBytes = 0; + this.haveUnclosedEntry = true; + write(data); + closeArchiveEntry(); + } else { + final Map<String, String> paxHeaders = new HashMap<>(); + final String entryName = entry.getName(); + final boolean paxHeaderContainsPath = handleLongName(entry, entryName, paxHeaders, "path", + TarConstants.LF_GNUTYPE_LONGNAME, "file name"); + + final String linkName = entry.getLinkName(); + final boolean paxHeaderContainsLinkPath = linkName != null && linkName.length() > 0 + && handleLongName(entry, linkName, paxHeaders, "linkpath", + TarConstants.LF_GNUTYPE_LONGLINK, "link name"); + + if (bigNumberMode == BIGNUMBER_POSIX) { + addPaxHeadersForBigNumbers(paxHeaders, entry); + } else if (bigNumberMode != BIGNUMBER_STAR) { + failForBigNumbers(entry); + } + + if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath + && !ASCII.canEncode(entryName)) { + paxHeaders.put("path", entryName); + } + + if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsLinkPath + && (entry.isLink() || entry.isSymbolicLink()) + && !ASCII.canEncode(linkName)) { + paxHeaders.put("linkpath", linkName); + } + paxHeaders.putAll(entry.getExtraPaxHeaders()); + + if (paxHeaders.size() > 0) { + writePaxHeaders(entry, entryName, paxHeaders); + } + + entry.writeEntryHeader(recordBuf, zipEncoding, bigNumberMode == BIGNUMBER_STAR); + writeRecord(recordBuf); + + currBytes = 0; + + if (entry.isDirectory()) { + currSize = 0; + } else { + currSize = entry.getSize(); + } + currName = entryName; + haveUnclosedEntry = true; + } + } + + /** + * Close an entry. This method MUST be called for all file entries that contain data. The reason + * is that we must buffer data written to the stream in order to satisfy the buffer's record + * based writes. Thus, there may be data fragments still being assembled that must be written to + * the output stream before this entry is closed and the next entry written. + * + * @throws IOException on error + */ + @Override + public void closeArchiveEntry() throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + if (!haveUnclosedEntry) { + throw new IOException("No current entry to close"); + } + out.flushBlock(); + if (currBytes < currSize) { + throw new IOException("entry '" + currName + "' closed at '" + + currBytes + + "' before the '" + currSize + + "' bytes specified in the header were written"); + } + recordsWritten += (currSize / RECORD_SIZE); + if (0 != currSize % RECORD_SIZE) { + recordsWritten++; + } + haveUnclosedEntry = false; + } + + /** + * Writes bytes to the current tar archive entry. This method is aware of the current entry and + * will throw an exception if you attempt to write bytes past the length specified for the + * current entry. + * + * @param wBuf The buffer to write to the archive. + * @param wOffset The offset in the buffer from which to get bytes. + * @param numToWrite The number of bytes to write. + * @throws IOException on error + */ + @Override + public void write(final byte[] wBuf, int wOffset, int numToWrite) throws IOException { + if (!haveUnclosedEntry) { + throw new IllegalStateException("No current tar entry"); + } + if (currBytes + numToWrite > currSize) { + throw new IOException("request to write '" + numToWrite + + "' bytes exceeds size in header of '" + + currSize + "' bytes for entry '" + + currName + "'"); + } + out.write(wBuf, wOffset, numToWrite); + currBytes += numToWrite; + } + + /** + * Writes a PAX extended header with the given map as contents. + * + * @since 1.4 + */ + void writePaxHeaders(final TarArchiveEntry entry, + final String entryName, + final Map<String, String> headers) throws IOException { + String name = "./PaxHeaders.X/" + stripTo7Bits(entryName); + if (name.length() >= TarConstants.NAMELEN) { + name = name.substring(0, TarConstants.NAMELEN - 1); + } + final TarArchiveEntry pex = new TarArchiveEntry(name, + TarConstants.LF_PAX_EXTENDED_HEADER_LC); + transferModTime(entry, pex); + + final byte[] data = encodeExtendedPaxHeadersContents(headers); + pex.setSize(data.length); + putArchiveEntry(pex); + write(data); + closeArchiveEntry(); + } + + private byte[] encodeExtendedPaxHeadersContents(Map<String, String> headers) + throws UnsupportedEncodingException { + final StringWriter w = new StringWriter(); + for (final Map.Entry<String, String> h : headers.entrySet()) { + final String key = h.getKey(); + final String value = h.getValue(); + int len = key.length() + value.length() + + 3 /* blank, equals and newline */ + + 2 /* guess 9 < actual length < 100 */; + String line = len + " " + key + "=" + value + "\n"; + int actualLength = line.getBytes(CharsetNames.UTF_8).length; + while (len != actualLength) { + // Adjust for cases where length < 10 or > 100 + // or where UTF-8 encoding isn't a single octet + // per character. + // Must be in loop as size may go from 99 to 100 in + // first pass so we'd need a second. + len = actualLength; + line = len + " " + key + "=" + value + "\n"; + actualLength = line.getBytes(CharsetNames.UTF_8).length; + } + w.write(line); + } + return w.toString().getBytes(CharsetNames.UTF_8); + } + + private String stripTo7Bits(final String name) { + final int length = name.length(); + final StringBuilder result = new StringBuilder(length); + for (int i = 0; i < length; i++) { + final char stripped = (char) (name.charAt(i) & 0x7F); + if (shouldBeReplaced(stripped)) { + result.append("_"); + } else { + result.append(stripped); + } + } + return result.toString(); + } + + /** + * @return true if the character could lead to problems when used inside a TarArchiveEntry name + * for a PAX header. + */ + private boolean shouldBeReplaced(final char c) { + return c == 0 // would be read as Trailing null + || c == '/' // when used as last character TAE will consider the PAX header a directory + || c == '\\'; // same as '/' as slashes get "normalized" on Windows + } + + /** + * Write an EOF (end of archive) record to the tar archive. An EOF record consists of a record + * of all zeros. + */ + private void writeEOFRecord() throws IOException { + Arrays.fill(recordBuf, (byte) 0); + writeRecord(recordBuf); + } + + @Override + public void flush() throws IOException { + out.flush(); + } + + @Override + public ArchiveEntry createArchiveEntry(final File inputFile, final String entryName) + throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + return new TarArchiveEntry(inputFile, entryName); + } + + /** + * Write an archive record to the archive. + * + * @param record The record data to write to the archive. + * @throws IOException on error + */ + private void writeRecord(final byte[] record) throws IOException { + if (record.length != RECORD_SIZE) { + throw new IOException("record to write has length '" + + record.length + + "' which is not the record size of '" + + RECORD_SIZE + "'"); + } + + out.write(record); + recordsWritten++; + } + + private void padAsNeeded() throws IOException { + final int start = recordsWritten % recordsPerBlock; + if (start != 0) { + for (int i = start; i < recordsPerBlock; i++) { + writeEOFRecord(); + } + } + } + + private void addPaxHeadersForBigNumbers(final Map<String, String> paxHeaders, + final TarArchiveEntry entry) { + addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(), + TarConstants.MAXSIZE); + addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getLongGroupId(), + TarConstants.MAXID); + addPaxHeaderForBigNumber(paxHeaders, "mtime", + entry.getModTime().getTime() / 1000, + TarConstants.MAXSIZE); + addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getLongUserId(), + TarConstants.MAXID); + // star extensions by J\u00f6rg Schilling + addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor", + entry.getDevMajor(), TarConstants.MAXID); + addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor", + entry.getDevMinor(), TarConstants.MAXID); + // there is no PAX header for file mode + failForBigNumber("mode", entry.getMode(), TarConstants.MAXID); + } + + private void addPaxHeaderForBigNumber(final Map<String, String> paxHeaders, + final String header, final long value, + final long maxValue) { + if (value < 0 || value > maxValue) { + paxHeaders.put(header, String.valueOf(value)); + } + } + + private void failForBigNumbers(final TarArchiveEntry entry) { + failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE); + failForBigNumberWithPosixMessage("group id", entry.getLongGroupId(), TarConstants.MAXID); + failForBigNumber("last modification time", + entry.getModTime().getTime() / 1000, + TarConstants.MAXSIZE); + failForBigNumber("user id", entry.getLongUserId(), TarConstants.MAXID); + failForBigNumber("mode", entry.getMode(), TarConstants.MAXID); + failForBigNumber("major device number", entry.getDevMajor(), + TarConstants.MAXID); + failForBigNumber("minor device number", entry.getDevMinor(), + TarConstants.MAXID); + } + + private void failForBigNumber(final String field, final long value, final long maxValue) { + failForBigNumber(field, value, maxValue, ""); + } + + private void failForBigNumberWithPosixMessage(final String field, final long value, + final long maxValue) { + failForBigNumber(field, value, maxValue, + " Use STAR or POSIX extensions to overcome this limit"); + } + + private void failForBigNumber(final String field, final long value, final long maxValue, + final String additionalMsg) { + if (value < 0 || value > maxValue) { + throw new RuntimeException(field + " '" + value //NOSONAR + + "' is too big ( > " + + maxValue + " )." + additionalMsg); + } + } + + /** + * Handles long file or link names according to the longFileMode setting. + * + * <p>I.e. if the given name is too long to be written to a plain tar header then <ul> <li>it + * creates a pax header who's name is given by the paxHeaderName parameter if longFileMode is + * POSIX</li> <li>it creates a GNU longlink entry who's type is given by the linkType parameter + * if longFileMode is GNU</li> <li>it throws an exception if longFileMode is ERROR</li> <li>it + * truncates the name if longFileMode is TRUNCATE</li> </ul></p> + * + * @param entry entry the name belongs to + * @param name the name to write + * @param paxHeaders current map of pax headers + * @param paxHeaderName name of the pax header to write + * @param linkType type of the GNU entry to write + * @param fieldName the name of the field + * @return whether a pax header has been written. + */ + private boolean handleLongName(final TarArchiveEntry entry, final String name, + final Map<String, String> paxHeaders, + final String paxHeaderName, final byte linkType, final String fieldName) + throws IOException { + final ByteBuffer encodedName = zipEncoding.encode(name); + final int len = encodedName.limit() - encodedName.position(); + if (len >= TarConstants.NAMELEN) { + + if (longFileMode == LONGFILE_POSIX) { + paxHeaders.put(paxHeaderName, name); + return true; + } else if (longFileMode == LONGFILE_GNU) { + // create a TarEntry for the LongLink, the contents + // of which are the link's name + final TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK, + linkType); + + longLinkEntry.setSize(len + 1L); // +1 for NUL + transferModTime(entry, longLinkEntry); + putArchiveEntry(longLinkEntry); + write(encodedName.array(), encodedName.arrayOffset(), len); + write(0); // NUL terminator + closeArchiveEntry(); + } else if (longFileMode != LONGFILE_TRUNCATE) { + throw new RuntimeException(fieldName + " '" + name //NOSONAR + + "' is too long ( > " + + TarConstants.NAMELEN + " bytes)"); + } + } + return false; + } + + private void transferModTime(final TarArchiveEntry from, final TarArchiveEntry to) { + Date fromModTime = from.getModTime(); + final long fromModTimeSeconds = fromModTime.getTime() / 1000; + if (fromModTimeSeconds < 0 || fromModTimeSeconds > TarConstants.MAXSIZE) { + fromModTime = new Date(0); + } + to.setModTime(fromModTime); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java new file mode 100644 index 000000000..a49e7180c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.tar; + +import java.io.IOException; + +/** + * This class represents a sparse entry in a Tar archive. + * + * <p> + * The C structure for a sparse entry is: + * <pre> + * struct posix_header { + * struct sparse sp[21]; // TarConstants.SPARSELEN_GNU_SPARSE - offset 0 + * char isextended; // TarConstants.ISEXTENDEDLEN_GNU_SPARSE - offset 504 + * }; + * </pre> + * Whereas, "struct sparse" is: + * <pre> + * struct sparse { + * char offset[12]; // offset 0 + * char numbytes[12]; // offset 12 + * }; + * </pre> + */ + +public class TarArchiveSparseEntry implements TarConstants { + /** If an extension sparse header follows. */ + private final boolean isExtended; + + /** + * Construct an entry from an archive's header bytes. File is set + * to null. + * + * @param headerBuf The header bytes from a tar archive entry. + * @throws IOException on unknown format + */ + public TarArchiveSparseEntry(final byte[] headerBuf) throws IOException { + int offset = 0; + offset += SPARSELEN_GNU_SPARSE; + isExtended = TarUtils.parseBoolean(headerBuf, offset); + } + + public boolean isExtended() { + return isExtended; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java new file mode 100644 index 000000000..751840d44 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.tar; + +/** + * This interface contains all the definitions used in the package. + * + * For tar formats (FORMAT_OLDGNU, FORMAT_POSIX, etc.) see GNU tar + * <I>tar.h</I> type <I>enum archive_format</I> + */ +// CheckStyle:InterfaceIsTypeCheck OFF (bc) +public interface TarConstants { + + /** Default record size */ + int DEFAULT_RCDSIZE = 512; + + /** Default block size */ + int DEFAULT_BLKSIZE = DEFAULT_RCDSIZE * 20; + + /** + * GNU format as per before tar 1.12. + */ + int FORMAT_OLDGNU = 2; + + /** + * Pure Posix format. + */ + int FORMAT_POSIX = 3; + + /** + * xstar format used by Jörg Schilling's star. + */ + int FORMAT_XSTAR = 4; + + /** + * The length of the name field in a header buffer. + */ + int NAMELEN = 100; + + /** + * The length of the mode field in a header buffer. + */ + int MODELEN = 8; + + /** + * The length of the user id field in a header buffer. + */ + int UIDLEN = 8; + + /** + * The length of the group id field in a header buffer. + */ + int GIDLEN = 8; + + /** + * The maximum value of gid/uid in a tar archive which can + * be expressed in octal char notation (that's 7 sevens, octal). + */ + long MAXID = 07777777L; + + /** + * The length of the checksum field in a header buffer. + */ + int CHKSUMLEN = 8; + + /** + * Offset of the checksum field within header record. + * @since 1.5 + */ + int CHKSUM_OFFSET = 148; + + /** + * The length of the size field in a header buffer. + * Includes the trailing space or NUL. + */ + int SIZELEN = 12; + + /** + * The maximum size of a file in a tar archive + * which can be expressed in octal char notation (that's 11 sevens, octal). + */ + long MAXSIZE = 077777777777L; + + /** Offset of start of magic field within header record */ + int MAGIC_OFFSET = 257; + /** + * The length of the magic field in a header buffer. + */ + int MAGICLEN = 6; + + /** Offset of start of magic field within header record */ + int VERSION_OFFSET = 263; + /** + * Previously this was regarded as part of "magic" field, but it is separate. + */ + int VERSIONLEN = 2; + + /** + * The length of the modification time field in a header buffer. + */ + int MODTIMELEN = 12; + + /** + * The length of the user name field in a header buffer. + */ + int UNAMELEN = 32; + + /** + * The length of the group name field in a header buffer. + */ + int GNAMELEN = 32; + + /** + * The length of each of the device fields (major and minor) in a header buffer. + */ + int DEVLEN = 8; + + /** + * Length of the prefix field. + * + */ + int PREFIXLEN = 155; + + /** + * The length of the access time field in an old GNU header buffer. + * + */ + int ATIMELEN_GNU = 12; + + /** + * The length of the created time field in an old GNU header buffer. + * + */ + int CTIMELEN_GNU = 12; + + /** + * The length of the multivolume start offset field in an old GNU header buffer. + * + */ + int OFFSETLEN_GNU = 12; + + /** + * The length of the long names field in an old GNU header buffer. + * + */ + int LONGNAMESLEN_GNU = 4; + + /** + * The length of the padding field in an old GNU header buffer. + * + */ + int PAD2LEN_GNU = 1; + + /** + * The sum of the length of all sparse headers in an old GNU header buffer. + * + */ + int SPARSELEN_GNU = 96; + + /** + * The length of the is extension field in an old GNU header buffer. + * + */ + int ISEXTENDEDLEN_GNU = 1; + + /** + * The length of the real size field in an old GNU header buffer. + * + */ + int REALSIZELEN_GNU = 12; + + /** + * The sum of the length of all sparse headers in a sparse header buffer. + * + */ + int SPARSELEN_GNU_SPARSE = 504; + + /** + * The length of the is extension field in a sparse header buffer. + * + */ + int ISEXTENDEDLEN_GNU_SPARSE = 1; + + /** + * LF_ constants represent the "link flag" of an entry, or more commonly, + * the "entry type". This is the "old way" of indicating a normal file. + */ + byte LF_OLDNORM = 0; + + /** + * Normal file type. + */ + byte LF_NORMAL = (byte) '0'; + + /** + * Link file type. + */ + byte LF_LINK = (byte) '1'; + + /** + * Symbolic link file type. + */ + byte LF_SYMLINK = (byte) '2'; + + /** + * Character device file type. + */ + byte LF_CHR = (byte) '3'; + + /** + * Block device file type. + */ + byte LF_BLK = (byte) '4'; + + /** + * Directory file type. + */ + byte LF_DIR = (byte) '5'; + + /** + * FIFO (pipe) file type. + */ + byte LF_FIFO = (byte) '6'; + + /** + * Contiguous file type. + */ + byte LF_CONTIG = (byte) '7'; + + /** + * Identifies the *next* file on the tape as having a long linkname. + */ + byte LF_GNUTYPE_LONGLINK = (byte) 'K'; + + /** + * Identifies the *next* file on the tape as having a long name. + */ + byte LF_GNUTYPE_LONGNAME = (byte) 'L'; + + /** + * Sparse file type. + * @since 1.1.1 + */ + byte LF_GNUTYPE_SPARSE = (byte) 'S'; + + // See "http://www.opengroup.org/onlinepubs/009695399/utilities/pax.html#tag_04_100_13_02" + + /** + * Identifies the entry as a Pax extended header. + * @since 1.1 + */ + byte LF_PAX_EXTENDED_HEADER_LC = (byte) 'x'; + + /** + * Identifies the entry as a Pax extended header (SunOS tar -E). + * + * @since 1.1 + */ + byte LF_PAX_EXTENDED_HEADER_UC = (byte) 'X'; + + /** + * Identifies the entry as a Pax global extended header. + * + * @since 1.1 + */ + byte LF_PAX_GLOBAL_EXTENDED_HEADER = (byte) 'g'; + + /** + * The magic tag representing a POSIX tar archive. + */ + String MAGIC_POSIX = "ustar\0"; + String VERSION_POSIX = "00"; + + /** + * The magic tag representing a GNU tar archive. + */ + String MAGIC_GNU = "ustar "; + // Appear to be two possible GNU versions + String VERSION_GNU_SPACE = " \0"; + String VERSION_GNU_ZERO = "0\0"; + + /** + * The magic tag representing an Ant tar archive. + * + * @since 1.1 + */ + String MAGIC_ANT = "ustar\0"; + + /** + * The "version" representing an Ant tar archive. + * + * @since 1.1 + */ + // Does not appear to have a version, however Ant does write 8 bytes, + // so assume the version is 2 nulls + String VERSION_ANT = "\0\0"; + + /** + * The name of the GNU tar entry which contains a long name. + */ + String GNU_LONGLINK = "././@LongLink"; // TODO rename as LONGLINK_GNU ? + + /** + * The magix string used in the last four bytes of the header to + * identify the xstar format. + * @since 1.11 + */ + String MAGIC_XSTAR = "tar\0"; + + /** + * Offset inside the header for the xstar magic bytes. + * @since 1.11 + */ + int XSTAR_MAGIC_OFFSET = 508; + + /** + * Length of the XSTAR magic. + * @since 1.11 + */ + int XSTAR_MAGIC_LEN = 4; + + /** + * Length of the prefix field in xstar archives. + * + * @since 1.11 + */ + int PREFIXLEN_XSTAR = 131; + + /** + * The length of the access time field in a xstar header buffer. + * + * @since 1.11 + */ + int ATIMELEN_XSTAR = 12; + + /** + * The length of the created time field in a xstar header buffer. + * + * @since 1.11 + */ + int CTIMELEN_XSTAR = 12; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java new file mode 100644 index 000000000..c83bcf966 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java @@ -0,0 +1,614 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.tar; + +import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN; +import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET; + +import java.io.IOException; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; + +/** + * This class provides static utility methods to work with byte streams. + * + * @Immutable + */ +// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) +public class TarUtils { + + private static final int BYTE_MASK = 255; + + static final ZipEncoding DEFAULT_ENCODING = + ZipEncodingHelper.getZipEncoding(null); + + /** + * Encapsulates the algorithms used up to Commons Compress 1.3 as + * ZipEncoding. + */ + static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { + @Override + public boolean canEncode(final String name) { return true; } + + @Override + public ByteBuffer encode(final String name) { + final int length = name.length(); + final byte[] buf = new byte[length]; + + // copy until end of input or output is reached. + for (int i = 0; i < length; ++i) { + buf[i] = (byte) name.charAt(i); + } + return ByteBuffer.wrap(buf); + } + + @Override + public String decode(final byte[] buffer) { + final int length = buffer.length; + final StringBuilder result = new StringBuilder(length); + + for (final byte b : buffer) { + if (b == 0) { // Trailing null + break; + } + result.append((char) (b & 0xFF)); // Allow for sign-extension + } + + return result.toString(); + } + }; + + /** Private constructor to prevent instantiation of this utility class. */ + private TarUtils(){ + } + + /** + * Parse an octal string from a buffer. + * + * <p>Leading spaces are ignored. + * The buffer must contain a trailing space or NUL, + * and may contain an additional trailing space or NUL.</p> + * + * <p>The input buffer is allowed to contain all NULs, + * in which case the method returns 0L + * (this allows for missing fields).</p> + * + * <p>To work-around some tar implementations that insert a + * leading NUL this method returns 0 if it detects a leading NUL + * since Commons Compress 1.4.</p> + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse - must be at least 2 bytes. + * @return The long value of the octal string. + * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. + */ + public static long parseOctal(final byte[] buffer, final int offset, final int length) { + long result = 0; + int end = offset + length; + int start = offset; + + if (length < 2){ + throw new IllegalArgumentException("Length "+length+" must be at least 2"); + } + + if (buffer[start] == 0) { + return 0L; + } + + // Skip leading spaces + while (start < end){ + if (buffer[start] == ' '){ + start++; + } else { + break; + } + } + + // Trim all trailing NULs and spaces. + // The ustar and POSIX tar specs require a trailing NUL or + // space but some implementations use the extra digit for big + // sizes/uids/gids ... + byte trailer = buffer[end - 1]; + while (start < end && (trailer == 0 || trailer == ' ')) { + end--; + trailer = buffer[end - 1]; + } + + for ( ;start < end; start++) { + final byte currentByte = buffer[start]; + // CheckStyle:MagicNumber OFF + if (currentByte < '0' || currentByte > '7'){ + throw new IllegalArgumentException( + exceptionMessage(buffer, offset, length, start, currentByte)); + } + result = (result << 3) + (currentByte - '0'); // convert from ASCII + // CheckStyle:MagicNumber ON + } + + return result; + } + + /** + * Compute the value contained in a byte buffer. If the most + * significant bit of the first byte in the buffer is set, this + * bit is ignored and the rest of the buffer is interpreted as a + * binary number. Otherwise, the buffer is interpreted as an + * octal number as per the parseOctal function above. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @return The long value of the octal or binary string. + * @throws IllegalArgumentException if the trailing space/NUL is + * missing or an invalid byte is detected in an octal number, or + * if a binary number would exceed the size of a signed long + * 64-bit integer. + * @since 1.4 + */ + public static long parseOctalOrBinary(final byte[] buffer, final int offset, + final int length) { + + if ((buffer[offset] & 0x80) == 0) { + return parseOctal(buffer, offset, length); + } + final boolean negative = buffer[offset] == (byte) 0xff; + if (length < 9) { + return parseBinaryLong(buffer, offset, length, negative); + } + return parseBinaryBigInteger(buffer, offset, length, negative); + } + + private static long parseBinaryLong(final byte[] buffer, final int offset, + final int length, + final boolean negative) { + if (length >= 9) { + throw new IllegalArgumentException("At offset " + offset + ", " + + length + " byte binary number" + + " exceeds maximum signed long" + + " value"); + } + long val = 0; + for (int i = 1; i < length; i++) { + val = (val << 8) + (buffer[offset + i] & 0xff); + } + if (negative) { + // 2's complement + val--; + val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; + } + return negative ? -val : val; + } + + private static long parseBinaryBigInteger(final byte[] buffer, + final int offset, + final int length, + final boolean negative) { + final byte[] remainder = new byte[length - 1]; + System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); + BigInteger val = new BigInteger(remainder); + if (negative) { + // 2's complement + val = val.add(BigInteger.valueOf(-1)).not(); + } + if (val.bitLength() > 63) { + throw new IllegalArgumentException("At offset " + offset + ", " + + length + " byte binary number" + + " exceeds maximum signed long" + + " value"); + } + return negative ? -val.longValue() : val.longValue(); + } + + /** + * Parse a boolean byte from a buffer. + * Leading spaces and NUL are ignored. + * The buffer may contain trailing spaces or NULs. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @return The boolean value of the bytes. + * @throws IllegalArgumentException if an invalid byte is detected. + */ + public static boolean parseBoolean(final byte[] buffer, final int offset) { + return buffer[offset] == 1; + } + + // Helper method to generate the exception message + private static String exceptionMessage(final byte[] buffer, final int offset, + final int length, final int current, final byte currentByte) { + // default charset is good enough for an exception message, + // + // the alternative was to modify parseOctal and + // parseOctalOrBinary to receive the ZipEncoding of the + // archive (deprecating the existing public methods, of + // course) and dealing with the fact that ZipEncoding#decode + // can throw an IOException which parseOctal* doesn't declare + String string = new String(buffer, offset, length); + + string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed + return "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; + } + + /** + * Parse an entry name from a buffer. + * Parsing stops when a NUL is found + * or the buffer length is reached. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @return The entry name. + */ + public static String parseName(final byte[] buffer, final int offset, final int length) { + try { + return parseName(buffer, offset, length, DEFAULT_ENCODING); + } catch (final IOException ex) { + try { + return parseName(buffer, offset, length, FALLBACK_ENCODING); + } catch (final IOException ex2) { + // impossible + throw new RuntimeException(ex2); //NOSONAR + } + } + } + + /** + * Parse an entry name from a buffer. + * Parsing stops when a NUL is found + * or the buffer length is reached. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @param encoding name of the encoding to use for file names + * @since 1.4 + * @return The entry name. + * @throws IOException on error + */ + public static String parseName(final byte[] buffer, final int offset, + final int length, + final ZipEncoding encoding) + throws IOException { + + int len = 0; + for (int i = offset; len < length && buffer[i] != 0; i++) { + len++; + } + if (len > 0) { + final byte[] b = new byte[len]; + System.arraycopy(buffer, offset, b, 0, len); + return encoding.decode(b); + } + return ""; + } + + /** + * Copy a name into a buffer. + * Copies characters from the name into the buffer + * starting at the specified offset. + * If the buffer is longer than the name, the buffer + * is filled with trailing NULs. + * If the name is longer than the buffer, + * the output is truncated. + * + * @param name The header name from which to copy the characters. + * @param buf The buffer where the name is to be stored. + * @param offset The starting offset into the buffer + * @param length The maximum number of header bytes to copy. + * @return The updated offset, i.e. offset + length + */ + public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { + try { + return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); + } catch (final IOException ex) { + try { + return formatNameBytes(name, buf, offset, length, + FALLBACK_ENCODING); + } catch (final IOException ex2) { + // impossible + throw new RuntimeException(ex2); //NOSONAR + } + } + } + + /** + * Copy a name into a buffer. + * Copies characters from the name into the buffer + * starting at the specified offset. + * If the buffer is longer than the name, the buffer + * is filled with trailing NULs. + * If the name is longer than the buffer, + * the output is truncated. + * + * @param name The header name from which to copy the characters. + * @param buf The buffer where the name is to be stored. + * @param offset The starting offset into the buffer + * @param length The maximum number of header bytes to copy. + * @param encoding name of the encoding to use for file names + * @since 1.4 + * @return The updated offset, i.e. offset + length + * @throws IOException on error + */ + public static int formatNameBytes(final String name, final byte[] buf, final int offset, + final int length, + final ZipEncoding encoding) + throws IOException { + int len = name.length(); + ByteBuffer b = encoding.encode(name); + while (b.limit() > length && len > 0) { + b = encoding.encode(name.substring(0, --len)); + } + final int limit = b.limit() - b.position(); + System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); + + // Pad any remaining output bytes with NUL + for (int i = limit; i < length; ++i) { + buf[offset + i] = 0; + } + + return offset + length; + } + + /** + * Fill buffer with unsigned octal number, padded with leading zeroes. + * + * @param value number to convert to octal - treated as unsigned + * @param buffer destination buffer + * @param offset starting offset in buffer + * @param length length of buffer to fill + * @throws IllegalArgumentException if the value will not fit in the buffer + */ + public static void formatUnsignedOctalString(final long value, final byte[] buffer, + final int offset, final int length) { + int remaining = length; + remaining--; + if (value == 0) { + buffer[offset + remaining--] = (byte) '0'; + } else { + long val = value; + for (; remaining >= 0 && val != 0; --remaining) { + // CheckStyle:MagicNumber OFF + buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); + val = val >>> 3; + // CheckStyle:MagicNumber ON + } + if (val != 0){ + throw new IllegalArgumentException + (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); + } + } + + for (; remaining >= 0; --remaining) { // leading zeros + buffer[offset + remaining] = (byte) '0'; + } + } + + /** + * Write an octal integer into a buffer. + * + * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by space and NUL + * + * @param value The value to write + * @param buf The buffer to receive the output + * @param offset The starting offset into the buffer + * @param length The size of the output buffer + * @return The updated offset, i.e offset+length + * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer + */ + public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { + + int idx=length-2; // For space and trailing null + formatUnsignedOctalString(value, buf, offset, idx); + + buf[offset + idx++] = (byte) ' '; // Trailing space + buf[offset + idx] = 0; // Trailing null + + return offset + length; + } + + /** + * Write an octal long integer into a buffer. + * + * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by a space. + * + * @param value The value to write as octal + * @param buf The destinationbuffer. + * @param offset The starting offset into the buffer. + * @param length The length of the buffer + * @return The updated offset + * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer + */ + public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { + + final int idx=length-1; // For space + + formatUnsignedOctalString(value, buf, offset, idx); + buf[offset + idx] = (byte) ' '; // Trailing space + + return offset + length; + } + + /** + * Write an long integer into a buffer as an octal string if this + * will fit, or as a binary number otherwise. + * + * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by a space. + * + * @param value The value to write into the buffer. + * @param buf The destination buffer. + * @param offset The starting offset into the buffer. + * @param length The length of the buffer. + * @return The updated offset. + * @throws IllegalArgumentException if the value (and trailer) + * will not fit in the buffer. + * @since 1.4 + */ + public static int formatLongOctalOrBinaryBytes( + final long value, final byte[] buf, final int offset, final int length) { + + // Check whether we are dealing with UID/GID or SIZE field + final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; + + final boolean negative = value < 0; + if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars + return formatLongOctalBytes(value, buf, offset, length); + } + + if (length < 9) { + formatLongBinary(value, buf, offset, length, negative); + } else { + formatBigIntegerBinary(value, buf, offset, length, negative); + } + + buf[offset] = (byte) (negative ? 0xff : 0x80); + return offset + length; + } + + private static void formatLongBinary(final long value, final byte[] buf, + final int offset, final int length, + final boolean negative) { + final int bits = (length - 1) * 8; + final long max = 1L << bits; + long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE + if (val < 0 || val >= max) { + throw new IllegalArgumentException("Value " + value + + " is too large for " + length + " byte field."); + } + if (negative) { + val ^= max - 1; + val++; + val |= 0xffL << bits; + } + for (int i = offset + length - 1; i >= offset; i--) { + buf[i] = (byte) val; + val >>= 8; + } + } + + private static void formatBigIntegerBinary(final long value, final byte[] buf, + final int offset, + final int length, + final boolean negative) { + final BigInteger val = BigInteger.valueOf(value); + final byte[] b = val.toByteArray(); + final int len = b.length; + if (len > length - 1) { + throw new IllegalArgumentException("Value " + value + + " is too large for " + length + " byte field."); + } + final int off = offset + length - len; + System.arraycopy(b, 0, buf, off, len); + final byte fill = (byte) (negative ? 0xff : 0); + for (int i = offset + 1; i < off; i++) { + buf[i] = fill; + } + } + + /** + * Writes an octal value into a buffer. + * + * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by NUL and then space. + * + * @param value The value to convert + * @param buf The destination buffer + * @param offset The starting offset into the buffer. + * @param length The size of the buffer. + * @return The updated value of offset, i.e. offset+length + * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer + */ + public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { + + int idx=length-2; // for NUL and space + formatUnsignedOctalString(value, buf, offset, idx); + + buf[offset + idx++] = 0; // Trailing null + buf[offset + idx] = (byte) ' '; // Trailing space + + return offset + length; + } + + /** + * Compute the checksum of a tar entry header. + * + * @param buf The tar entry's header buffer. + * @return The computed checksum. + */ + public static long computeCheckSum(final byte[] buf) { + long sum = 0; + + for (final byte element : buf) { + sum += BYTE_MASK & element; + } + + return sum; + } + + /** + * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>: + * <blockquote> + * The checksum is calculated by taking the sum of the unsigned byte values + * of the header block with the eight checksum bytes taken to be ascii + * spaces (decimal value 32). It is stored as a six digit octal number with + * leading zeroes followed by a NUL and then a space. Various + * implementations do not adhere to this format. For better compatibility, + * ignore leading and trailing whitespace, and get the first six digits. In + * addition, some historic tar implementations treated bytes as signed. + * Implementations typically calculate the checksum both ways, and treat it + * as good if either the signed or unsigned sum matches the included + * checksum. + * </blockquote> + * <p> + * The return value of this method should be treated as a best-effort + * heuristic rather than an absolute and final truth. The checksum + * verification logic may well evolve over time as more special cases + * are encountered. + * + * @param header tar header + * @return whether the checksum is reasonably good + * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> + * @since 1.5 + */ + public static boolean verifyCheckSum(final byte[] header) { + final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN); + long unsignedSum = 0; + long signedSum = 0; + + for (int i = 0; i < header.length; i++) { + byte b = header[i]; + if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { + b = ' '; + } + unsignedSum += 0xff & b; + signedSum += b; + } + return storedSum == unsignedSum || storedSum == signedSum; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/package.html b/src/main/java/org/apache/commons/compress/archivers/tar/package.html new file mode 100644 index 000000000..141f33b61 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/tar/package.html @@ -0,0 +1,30 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for reading and writing archives using + the TAR format.</p> + + <p>There are many different format dialects that call themselves + TAR. The classes of this package can read and write archives in + the traditional pre-POSIX <b>ustar</b> format and support GNU + specific extensions for long filenames that GNU tar itself by + now refers to as <b>oldgnu</b>.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/AbstractUnicodeExtraField.java b/src/main/java/org/apache/commons/compress/archivers/zip/AbstractUnicodeExtraField.java new file mode 100644 index 000000000..846c9e1e9 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/AbstractUnicodeExtraField.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import java.io.UnsupportedEncodingException; +import java.util.zip.CRC32; +import java.util.zip.ZipException; + +import org.apache.commons.compress.utils.CharsetNames; + +/** + * A common base class for Unicode extra information extra fields. + * @NotThreadSafe + */ +public abstract class AbstractUnicodeExtraField implements ZipExtraField { + private long nameCRC32; + private byte[] unicodeName; + private byte[] data; + + protected AbstractUnicodeExtraField() { + } + + /** + * Assemble as unicode extension from the name/comment and + * encoding of the original zip entry. + * + * @param text The file name or comment. + * @param bytes The encoded of the filename or comment in the zip + * file. + * @param off The offset of the encoded filename or comment in + * <code>bytes</code>. + * @param len The length of the encoded filename or commentin + * <code>bytes</code>. + */ + protected AbstractUnicodeExtraField(final String text, final byte[] bytes, final int off, final int len) { + final CRC32 crc32 = new CRC32(); + crc32.update(bytes, off, len); + nameCRC32 = crc32.getValue(); + + try { + unicodeName = text.getBytes(CharsetNames.UTF_8); + } catch (final UnsupportedEncodingException e) { + throw new RuntimeException("FATAL: UTF-8 encoding not supported.", e); //NOSONAR + } + } + + /** + * Assemble as unicode extension from the name/comment and + * encoding of the original zip entry. + * + * @param text The file name or comment. + * @param bytes The encoded of the filename or comment in the zip + * file. + */ + protected AbstractUnicodeExtraField(final String text, final byte[] bytes) { + this(text, bytes, 0, bytes.length); + } + + private void assembleData() { + if (unicodeName == null) { + return; + } + + data = new byte[5 + unicodeName.length]; + // version 1 + data[0] = 0x01; + System.arraycopy(ZipLong.getBytes(nameCRC32), 0, data, 1, 4); + System.arraycopy(unicodeName, 0, data, 5, unicodeName.length); + } + + /** + * @return The CRC32 checksum of the filename or comment as + * encoded in the central directory of the zip file. + */ + public long getNameCRC32() { + return nameCRC32; + } + + /** + * @param nameCRC32 The CRC32 checksum of the filename as encoded + * in the central directory of the zip file to set. + */ + public void setNameCRC32(final long nameCRC32) { + this.nameCRC32 = nameCRC32; + data = null; + } + + /** + * @return The UTF-8 encoded name. + */ + public byte[] getUnicodeName() { + byte[] b = null; + if (unicodeName != null) { + b = new byte[unicodeName.length]; + System.arraycopy(unicodeName, 0, b, 0, b.length); + } + return b; + } + + /** + * @param unicodeName The UTF-8 encoded name to set. + */ + public void setUnicodeName(final byte[] unicodeName) { + if (unicodeName != null) { + this.unicodeName = new byte[unicodeName.length]; + System.arraycopy(unicodeName, 0, this.unicodeName, 0, + unicodeName.length); + } else { + this.unicodeName = null; + } + data = null; + } + + @Override + public byte[] getCentralDirectoryData() { + if (data == null) { + this.assembleData(); + } + byte[] b = null; + if (data != null) { + b = new byte[data.length]; + System.arraycopy(data, 0, b, 0, b.length); + } + return b; + } + + @Override + public ZipShort getCentralDirectoryLength() { + if (data == null) { + assembleData(); + } + return new ZipShort(data != null ? data.length : 0); + } + + @Override + public byte[] getLocalFileDataData() { + return getCentralDirectoryData(); + } + + @Override + public ZipShort getLocalFileDataLength() { + return getCentralDirectoryLength(); + } + + @Override + public void parseFromLocalFileData(final byte[] buffer, final int offset, final int length) + throws ZipException { + + if (length < 5) { + throw new ZipException("UniCode path extra data must have at least 5 bytes."); + } + + final int version = buffer[offset]; + + if (version != 0x01) { + throw new ZipException("Unsupported version [" + version + + "] for UniCode path extra data."); + } + + nameCRC32 = ZipLong.getValue(buffer, offset + 1); + unicodeName = new byte[length - 5]; + System.arraycopy(buffer, offset + 5, unicodeName, 0, length - 5); + data = null; + } + + /** + * Doesn't do anything special since this class always uses the + * same data in central directory and local file data. + */ + @Override + public void parseFromCentralDirectoryData(final byte[] buffer, final int offset, + final int length) + throws ZipException { + parseFromLocalFileData(buffer, offset, length); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/AsiExtraField.java b/src/main/java/org/apache/commons/compress/archivers/zip/AsiExtraField.java new file mode 100644 index 000000000..e4afa95cd --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/AsiExtraField.java @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import java.util.zip.CRC32; +import java.util.zip.ZipException; + +/** + * Adds Unix file permission and UID/GID fields as well as symbolic + * link handling. + * + * <p>This class uses the ASi extra field in the format:</p> + * <pre> + * Value Size Description + * ----- ---- ----------- + * (Unix3) 0x756e Short tag for this extra block type + * TSize Short total data size for this block + * CRC Long CRC-32 of the remaining data + * Mode Short file permissions + * SizDev Long symlink'd size OR major/minor dev num + * UID Short user ID + * GID Short group ID + * (var.) variable symbolic link filename + * </pre> + * <p>taken from appnote.iz (Info-ZIP note, 981119) found at <a + * href="ftp://ftp.uu.net/pub/archiving/zip/doc/">ftp://ftp.uu.net/pub/archiving/zip/doc/</a></p> + * + * <p>Short is two bytes and Long is four bytes in big endian byte and + * word order, device numbers are currently not supported.</p> + * @NotThreadSafe + * + * <p>Since the documentation this class is based upon doesn't mention + * the character encoding of the file name at all, it is assumed that + * it uses the current platform's default encoding.</p> + */ +public class AsiExtraField implements ZipExtraField, UnixStat, Cloneable { + + private static final ZipShort HEADER_ID = new ZipShort(0x756E); + private static final int WORD = 4; + /** + * Standard Unix stat(2) file mode. + */ + private int mode = 0; + /** + * User ID. + */ + private int uid = 0; + /** + * Group ID. + */ + private int gid = 0; + /** + * File this entry points to, if it is a symbolic link. + * + * <p>empty string - if entry is not a symbolic link.</p> + */ + private String link = ""; + /** + * Is this an entry for a directory? + */ + private boolean dirFlag = false; + + /** + * Instance used to calculate checksums. + */ + private CRC32 crc = new CRC32(); + + /** Constructor for AsiExtraField. */ + public AsiExtraField() { + } + + /** + * The Header-ID. + * @return the value for the header id for this extrafield + */ + @Override + public ZipShort getHeaderId() { + return HEADER_ID; + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * @return a <code>ZipShort</code> for the length of the data of this extra field + */ + @Override + public ZipShort getLocalFileDataLength() { + return new ZipShort(WORD // CRC + + 2 // Mode + + WORD // SizDev + + 2 // UID + + 2 // GID + + getLinkedFile().getBytes().length); + // Uses default charset - see class Javadoc + } + + /** + * Delegate to local file data. + * @return the centralDirectory length + */ + @Override + public ZipShort getCentralDirectoryLength() { + return getLocalFileDataLength(); + } + + /** + * The actual data to put into local file data - without Header-ID + * or length specifier. + * @return get the data + */ + @Override + public byte[] getLocalFileDataData() { + // CRC will be added later + final byte[] data = new byte[getLocalFileDataLength().getValue() - WORD]; + System.arraycopy(ZipShort.getBytes(getMode()), 0, data, 0, 2); + + final byte[] linkArray = getLinkedFile().getBytes(); // Uses default charset - see class Javadoc + // CheckStyle:MagicNumber OFF + System.arraycopy(ZipLong.getBytes(linkArray.length), + 0, data, 2, WORD); + + System.arraycopy(ZipShort.getBytes(getUserId()), + 0, data, 6, 2); + System.arraycopy(ZipShort.getBytes(getGroupId()), + 0, data, 8, 2); + + System.arraycopy(linkArray, 0, data, 10, linkArray.length); + // CheckStyle:MagicNumber ON + + crc.reset(); + crc.update(data); + final long checksum = crc.getValue(); + + final byte[] result = new byte[data.length + WORD]; + System.arraycopy(ZipLong.getBytes(checksum), 0, result, 0, WORD); + System.arraycopy(data, 0, result, WORD, data.length); + return result; + } + + /** + * Delegate to local file data. + * @return the local file data + */ + @Override + public byte[] getCentralDirectoryData() { + return getLocalFileDataData(); + } + + /** + * Set the user id. + * @param uid the user id + */ + public void setUserId(final int uid) { + this.uid = uid; + } + + /** + * Get the user id. + * @return the user id + */ + public int getUserId() { + return uid; + } + + /** + * Set the group id. + * @param gid the group id + */ + public void setGroupId(final int gid) { + this.gid = gid; + } + + /** + * Get the group id. + * @return the group id + */ + public int getGroupId() { + return gid; + } + + /** + * Indicate that this entry is a symbolic link to the given filename. + * + * @param name Name of the file this entry links to, empty String + * if it is not a symbolic link. + */ + public void setLinkedFile(final String name) { + link = name; + mode = getMode(mode); + } + + /** + * Name of linked file + * + * @return name of the file this entry links to if it is a + * symbolic link, the empty string otherwise. + */ + public String getLinkedFile() { + return link; + } + + /** + * Is this entry a symbolic link? + * @return true if this is a symbolic link + */ + public boolean isLink() { + return getLinkedFile().length() != 0; + } + + /** + * File mode of this file. + * @param mode the file mode + */ + public void setMode(final int mode) { + this.mode = getMode(mode); + } + + /** + * File mode of this file. + * @return the file mode + */ + public int getMode() { + return mode; + } + + /** + * Indicate whether this entry is a directory. + * @param dirFlag if true, this entry is a directory + */ + public void setDirectory(final boolean dirFlag) { + this.dirFlag = dirFlag; + mode = getMode(mode); + } + + /** + * Is this entry a directory? + * @return true if this entry is a directory + */ + public boolean isDirectory() { + return dirFlag && !isLink(); + } + + /** + * Populate data from this array as if it was in local file data. + * @param data an array of bytes + * @param offset the start offset + * @param length the number of bytes in the array from offset + * @throws ZipException on error + */ + @Override + public void parseFromLocalFileData(final byte[] data, final int offset, final int length) + throws ZipException { + + final long givenChecksum = ZipLong.getValue(data, offset); + final byte[] tmp = new byte[length - WORD]; + System.arraycopy(data, offset + WORD, tmp, 0, length - WORD); + crc.reset(); + crc.update(tmp); + final long realChecksum = crc.getValue(); + if (givenChecksum != realChecksum) { + throw new ZipException("bad CRC checksum " + + Long.toHexString(givenChecksum) + + " instead of " + + Long.toHexString(realChecksum)); + } + + final int newMode = ZipShort.getValue(tmp, 0); + // CheckStyle:MagicNumber OFF + final byte[] linkArray = new byte[(int) ZipLong.getValue(tmp, 2)]; + uid = ZipShort.getValue(tmp, 6); + gid = ZipShort.getValue(tmp, 8); + + if (linkArray.length == 0) { + link = ""; + } else { + System.arraycopy(tmp, 10, linkArray, 0, linkArray.length); + link = new String(linkArray); // Uses default charset - see class Javadoc + } + // CheckStyle:MagicNumber ON + setDirectory((newMode & DIR_FLAG) != 0); + setMode(newMode); + } + + /** + * Doesn't do anything special since this class always uses the + * same data in central directory and local file data. + */ + @Override + public void parseFromCentralDirectoryData(final byte[] buffer, final int offset, + final int length) + throws ZipException { + parseFromLocalFileData(buffer, offset, length); + } + + /** + * Get the file mode for given permissions with the correct file type. + * @param mode the mode + * @return the type with the mode + */ + protected int getMode(final int mode) { + int type = FILE_FLAG; + if (isLink()) { + type = LINK_FLAG; + } else if (isDirectory()) { + type = DIR_FLAG; + } + return type | (mode & PERM_MASK); + } + + @Override + public Object clone() { + try { + final AsiExtraField cloned = (AsiExtraField) super.clone(); + cloned.crc = new CRC32(); + return cloned; + } catch (final CloneNotSupportedException cnfe) { + // impossible + throw new RuntimeException(cnfe); //NOSONAR + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/BinaryTree.java b/src/main/java/org/apache/commons/compress/archivers/zip/BinaryTree.java new file mode 100644 index 000000000..9b3c37756 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/BinaryTree.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +/** + * Binary tree of positive values. + * + * @author Emmanuel Bourg + * @since 1.7 + */ +class BinaryTree { + + /** Value in the array indicating an undefined node */ + private static final int UNDEFINED = -1; + + /** Value in the array indicating a non leaf node */ + private static final int NODE = -2; + + /** + * The array representing the binary tree. The root is at index 0, + * the left children are at 2*i+1 and the right children at 2*i+2. + */ + private final int[] tree; + + public BinaryTree(final int depth) { + tree = new int[(1 << (depth + 1)) - 1]; + Arrays.fill(tree, UNDEFINED); + } + + /** + * Adds a leaf to the tree. + * + * @param node the index of the node where the path is appended + * @param path the path to the leaf (bits are parsed from the right to the left) + * @param depth the number of nodes in the path + * @param value the value of the leaf (must be positive) + */ + public void addLeaf(final int node, final int path, final int depth, final int value) { + if (depth == 0) { + // end of the path reached, add the value to the current node + if (tree[node] == UNDEFINED) { + tree[node] = value; + } else { + throw new IllegalArgumentException("Tree value at index " + node + " has already been assigned (" + tree[node] + ")"); + } + } else { + // mark the current node as a non leaf node + tree[node] = NODE; + + // move down the path recursively + final int nextChild = 2 * node + 1 + (path & 1); + addLeaf(nextChild, path >>> 1, depth - 1, value); + } + } + + /** + * Reads a value from the specified bit stream. + * + * @param stream + * @return the value decoded, or -1 if the end of the stream is reached + */ + public int read(final BitStream stream) throws IOException { + int currentIndex = 0; + + while (true) { + final int bit = stream.nextBit(); + if (bit == -1) { + return -1; + } + + final int childIndex = 2 * currentIndex + 1 + bit; + final int value = tree[childIndex]; + if (value == NODE) { + // consume the next bit + currentIndex = childIndex; + } else if (value != UNDEFINED) { + return value; + } else { + throw new IOException("The child " + bit + " of node at index " + currentIndex + " is not defined"); + } + } + } + + + /** + * Decodes the packed binary tree from the specified stream. + */ + static BinaryTree decode(final InputStream in, final int totalNumberOfValues) throws IOException { + // the first byte contains the size of the structure minus one + final int size = in.read() + 1; + if (size == 0) { + throw new IOException("Cannot read the size of the encoded tree, unexpected end of stream"); + } + + final byte[] encodedTree = new byte[size]; + new DataInputStream(in).readFully(encodedTree); + + /** The maximum bit length for a value (16 or lower) */ + int maxLength = 0; + + final int[] originalBitLengths = new int[totalNumberOfValues]; + int pos = 0; + for (final byte b : encodedTree) { + // each byte encodes the number of values (upper 4 bits) for a bit length (lower 4 bits) + final int numberOfValues = ((b & 0xF0) >> 4) + 1; + final int bitLength = (b & 0x0F) + 1; + + for (int j = 0; j < numberOfValues; j++) { + originalBitLengths[pos++] = bitLength; + } + + maxLength = Math.max(maxLength, bitLength); + } + + // sort the array of bit lengths and memorize the permutation used to restore the order of the codes + final int[] permutation = new int[originalBitLengths.length]; + for (int k = 0; k < permutation.length; k++) { + permutation[k] = k; + } + + int c = 0; + final int[] sortedBitLengths = new int[originalBitLengths.length]; + for (int k = 0; k < originalBitLengths.length; k++) { + // iterate over the values + for (int l = 0; l < originalBitLengths.length; l++) { + // look for the value in the original array + if (originalBitLengths[l] == k) { + // put the value at the current position in the sorted array... + sortedBitLengths[c] = k; + + // ...and memorize the permutation + permutation[c] = l; + + c++; + } + } + } + + // decode the values of the tree + int code = 0; + int codeIncrement = 0; + int lastBitLength = 0; + + final int[] codes = new int[totalNumberOfValues]; + + for (int i = totalNumberOfValues - 1; i >= 0; i--) { + code = code + codeIncrement; + if (sortedBitLengths[i] != lastBitLength) { + lastBitLength = sortedBitLengths[i]; + codeIncrement = 1 << (16 - lastBitLength); + } + codes[permutation[i]] = code; + } + + // build the tree + final BinaryTree tree = new BinaryTree(maxLength); + + for (int k = 0; k < codes.length; k++) { + final int bitLength = originalBitLengths[k]; + if (bitLength > 0) { + tree.addLeaf(0, Integer.reverse(codes[k] << 16), bitLength, k); + } + } + + return tree; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/BitStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/BitStream.java new file mode 100644 index 000000000..fb737b797 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/BitStream.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import java.io.IOException; +import java.io.InputStream; +import org.apache.commons.compress.utils.BitInputStream; +import java.nio.ByteOrder; + +/** + * Iterates over the bits of an InputStream. For each byte the bits + * are read from the right to the left. + * + * @since 1.7 + */ +class BitStream extends BitInputStream { + + BitStream(final InputStream in) { + super(in, ByteOrder.LITTLE_ENDIAN); + } + + /** + * Returns the next bit. + * + * @return The next bit (0 or 1) or -1 if the end of the stream has been reached + */ + int nextBit() throws IOException { + return (int) readBits(1); + } + + /** + * Returns the integer value formed by the n next bits (up to 8 bits). + * + * @param n the number of bits read (up to 8) + * @return The value formed by the n bits, or -1 if the end of the stream has been reached + */ + long nextBits(final int n) throws IOException { + return readBits(n); + } + + int nextByte() throws IOException { + return (int) readBits(8); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/CharsetAccessor.java b/src/main/java/org/apache/commons/compress/archivers/zip/CharsetAccessor.java new file mode 100644 index 000000000..e5342bec7 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/CharsetAccessor.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import java.nio.charset.Charset; + +/** + * An interface added to allow access to the character set associated with an {@link NioZipEncoding}, + * without requiring a new method to be added to {@link ZipEncoding}. + * <p> + * This avoids introducing a + * potentially breaking change, or making {@link NioZipEncoding} a public class. + * </p> + * @since 1.15 + */ +public interface CharsetAccessor { + + /** + * Provides access to the character set associated with an object. + * <p> + * This allows nio oriented code to use more natural character encoding/decoding methods, + * whilst allowing existing code to continue to rely on special-case error handling for UTF-8. + * </p> + * @return the character set associated with this object + */ + Charset getCharset(); +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/CircularBuffer.java b/src/main/java/org/apache/commons/compress/archivers/zip/CircularBuffer.java new file mode 100644 index 000000000..8502e46e5 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/CircularBuffer.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +/** + * Circular byte buffer. + * + * @author Emmanuel Bourg + * @since 1.7 + */ +class CircularBuffer { + + /** Size of the buffer */ + private final int size; + + /** The buffer */ + private final byte[] buffer; + + /** Index of the next data to be read from the buffer */ + private int readIndex; + + /** Index of the next data written in the buffer */ + private int writeIndex; + + CircularBuffer(final int size) { + this.size = size; + buffer = new byte[size]; + } + + /** + * Tells if a new byte can be read from the buffer. + */ + public boolean available() { + return readIndex != writeIndex; + } + + /** + * Writes a byte to the buffer. + */ + public void put(final int value) { + buffer[writeIndex] = (byte) value; + writeIndex = (writeIndex + 1) % size; + } + + /** + * Reads a byte from the buffer. + */ + public int get() { + if (available()) { + final int value = buffer[readIndex]; + readIndex = (readIndex + 1) % size; + return value & 0xFF; + } + return -1; + } + + /** + * Copy a previous interval in the buffer to the current position. + * + * @param distance the distance from the current write position + * @param length the number of bytes to copy + */ + public void copy(final int distance, final int length) { + final int pos1 = writeIndex - distance; + final int pos2 = pos1 + length; + for (int i = pos1; i < pos2; i++) { + buffer[writeIndex] = buffer[(i + size) % size]; + writeIndex = (writeIndex + 1) % size; + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ExplodingInputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ExplodingInputStream.java new file mode 100644 index 000000000..70eb30833 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ExplodingInputStream.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.InputStreamStatistics; + +import java.io.IOException; +import java.io.InputStream; + +/** + * The implode compression method was added to PKZIP 1.01 released in 1989. + * It was then dropped from PKZIP 2.0 released in 1993 in favor of the deflate + * method. + * <p> + * The algorithm is described in the ZIP File Format Specification. + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">ZIP File Format Specification</a> + * + * @author Emmanuel Bourg + * @since 1.7 + */ +class ExplodingInputStream extends InputStream implements InputStreamStatistics { + + /** The underlying stream containing the compressed data */ + private final InputStream in; + + /** The stream of bits read from the input stream */ + private BitStream bits; + + /** The size of the sliding dictionary (4K or 8K) */ + private final int dictionarySize; + + /** The number of Shannon-Fano trees (2 or 3) */ + private final int numberOfTrees; + + private final int minimumMatchLength; + + /** The binary tree containing the 256 encoded literals (null when only two trees are used) */ + private BinaryTree literalTree; + + /** The binary tree containing the 64 encoded lengths */ + private BinaryTree lengthTree; + + /** The binary tree containing the 64 encoded distances */ + private BinaryTree distanceTree; + + /** Output buffer holding the decompressed data */ + private final CircularBuffer buffer = new CircularBuffer(32 * 1024); + + private long uncompressedCount = 0; + + private long treeSizes = 0; + + /** + * Create a new stream decompressing the content of the specified stream + * using the explode algorithm. + * + * @param dictionarySize the size of the sliding dictionary (4096 or 8192) + * @param numberOfTrees the number of trees (2 or 3) + * @param in the compressed data stream + */ + public ExplodingInputStream(final int dictionarySize, final int numberOfTrees, final InputStream in) { + if (dictionarySize != 4096 && dictionarySize != 8192) { + throw new IllegalArgumentException("The dictionary size must be 4096 or 8192"); + } + if (numberOfTrees != 2 && numberOfTrees != 3) { + throw new IllegalArgumentException("The number of trees must be 2 or 3"); + } + this.dictionarySize = dictionarySize; + this.numberOfTrees = numberOfTrees; + this.minimumMatchLength = numberOfTrees; + this.in = in; + } + + /** + * Reads the encoded binary trees and prepares the bit stream. + * + * @throws IOException + */ + private void init() throws IOException { + if (bits == null) { + try (CountingInputStream i = new CountingInputStream(in) { + @Override + public void close() { + // we do not want to close in + } + }) { + if (numberOfTrees == 3) { + literalTree = BinaryTree.decode(i, 256); + } + + lengthTree = BinaryTree.decode(i, 64); + distanceTree = BinaryTree.decode(i, 64); + treeSizes += i.getBytesRead(); + } + + bits = new BitStream(in); + } + } + + @Override + public int read() throws IOException { + if (!buffer.available()) { + fillBuffer(); + } + + final int ret = buffer.get(); + if (ret > -1) { + uncompressedCount++; + } + return ret; + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return bits.getBytesRead() + treeSizes; + } + + /** + * @since 1.17 + */ + @Override + public long getUncompressedCount() { + return uncompressedCount; + } + + /** + * @since 1.17 + */ + @Override + public void close() throws IOException { + in.close(); + } + + /** + * Fill the sliding dictionary with more data. + * @throws IOException + */ + private void fillBuffer() throws IOException { + init(); + + final int bit = bits.nextBit(); + if (bit == 1) { + // literal value + int literal; + if (literalTree != null) { + literal = literalTree.read(bits); + } else { + literal = bits.nextByte(); + } + + if (literal == -1) { + // end of stream reached, nothing left to decode + return; + } + + buffer.put(literal); + + } else if (bit == 0) { + // back reference + final int distanceLowSize = dictionarySize == 4096 ? 6 : 7; + final int distanceLow = (int) bits.nextBits(distanceLowSize); + final int distanceHigh = distanceTree.read(bits); + if (distanceHigh == -1 && distanceLow <= 0) { + // end of stream reached, nothing left to decode + return; + } + final int distance = distanceHigh << distanceLowSize | distanceLow; + + int length = lengthTree.read(bits); + if (length == 63) { + length += bits.nextBits(8); + } + length += minimumMatchLength; + + buffer.copy(distance + 1, length); + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ExtraFieldUtils.java b/src/main/java/org/apache/commons/compress/archivers/zip/ExtraFieldUtils.java new file mode 100644 index 000000000..eed6cb9e3 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ExtraFieldUtils.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.zip.ZipException; + +/** + * ZipExtraField related methods + * @NotThreadSafe because the HashMap is not synch. + */ +// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) +public class ExtraFieldUtils { + + private static final int WORD = 4; + + /** + * Static registry of known extra fields. + */ + private static final Map<ZipShort, Class<?>> implementations; + + static { + implementations = new ConcurrentHashMap<>(); + register(AsiExtraField.class); + register(X5455_ExtendedTimestamp.class); + register(X7875_NewUnix.class); + register(JarMarker.class); + register(UnicodePathExtraField.class); + register(UnicodeCommentExtraField.class); + register(Zip64ExtendedInformationExtraField.class); + register(X000A_NTFS.class); + register(X0014_X509Certificates.class); + register(X0015_CertificateIdForFile.class); + register(X0016_CertificateIdForCentralDirectory.class); + register(X0017_StrongEncryptionHeader.class); + register(X0019_EncryptionRecipientCertificateList.class); + register(ResourceAlignmentExtraField.class); + } + + /** + * Register a ZipExtraField implementation. + * + * <p>The given class must have a no-arg constructor and implement + * the {@link ZipExtraField ZipExtraField interface}.</p> + * @param c the class to register + */ + public static void register(final Class<?> c) { + try { + final ZipExtraField ze = (ZipExtraField) c.newInstance(); + implementations.put(ze.getHeaderId(), c); + } catch (final ClassCastException cc) { + throw new RuntimeException(c + " doesn\'t implement ZipExtraField"); //NOSONAR + } catch (final InstantiationException ie) { + throw new RuntimeException(c + " is not a concrete class"); //NOSONAR + } catch (final IllegalAccessException ie) { + throw new RuntimeException(c + "\'s no-arg constructor is not public"); //NOSONAR + } + } + + /** + * Create an instance of the appropriate ExtraField, falls back to + * {@link UnrecognizedExtraField UnrecognizedExtraField}. + * @param headerId the header identifier + * @return an instance of the appropriate ExtraField + * @throws InstantiationException if unable to instantiate the class + * @throws IllegalAccessException if not allowed to instantiate the class + */ + public static ZipExtraField createExtraField(final ZipShort headerId) + throws InstantiationException, IllegalAccessException { + final Class<?> c = implementations.get(headerId); + if (c != null) { + return (ZipExtraField) c.newInstance(); + } + final UnrecognizedExtraField u = new UnrecognizedExtraField(); + u.setHeaderId(headerId); + return u; + } + + /** + * Split the array into ExtraFields and populate them with the + * given data as local file data, throwing an exception if the + * data cannot be parsed. + * @param data an array of bytes as it appears in local file data + * @return an array of ExtraFields + * @throws ZipException on error + */ + public static ZipExtraField[] parse(final byte[] data) throws ZipException { + return parse(data, true, UnparseableExtraField.THROW); + } + + /** + * Split the array into ExtraFields and populate them with the + * given data, throwing an exception if the data cannot be parsed. + * @param data an array of bytes + * @param local whether data originates from the local file data + * or the central directory + * @return an array of ExtraFields + * @throws ZipException on error + */ + public static ZipExtraField[] parse(final byte[] data, final boolean local) + throws ZipException { + return parse(data, local, UnparseableExtraField.THROW); + } + + /** + * Split the array into ExtraFields and populate them with the + * given data. + * @param data an array of bytes + * @param local whether data originates from the local file data + * or the central directory + * @param onUnparseableData what to do if the extra field data + * cannot be parsed. + * @return an array of ExtraFields + * @throws ZipException on error + * + * @since 1.1 + */ + public static ZipExtraField[] parse(final byte[] data, final boolean local, + final UnparseableExtraField onUnparseableData) + throws ZipException { + final List<ZipExtraField> v = new ArrayList<>(); + int start = 0; + LOOP: + while (start <= data.length - WORD) { + final ZipShort headerId = new ZipShort(data, start); + final int length = new ZipShort(data, start + 2).getValue(); + if (start + WORD + length > data.length) { + switch(onUnparseableData.getKey()) { + case UnparseableExtraField.THROW_KEY: + throw new ZipException("bad extra field starting at " + + start + ". Block length of " + + length + " bytes exceeds remaining" + + " data of " + + (data.length - start - WORD) + + " bytes."); + case UnparseableExtraField.READ_KEY: + final UnparseableExtraFieldData field = + new UnparseableExtraFieldData(); + if (local) { + field.parseFromLocalFileData(data, start, + data.length - start); + } else { + field.parseFromCentralDirectoryData(data, start, + data.length - start); + } + v.add(field); + //$FALL-THROUGH$ + case UnparseableExtraField.SKIP_KEY: + // since we cannot parse the data we must assume + // the extra field consumes the whole rest of the + // available data + break LOOP; + default: + throw new ZipException("unknown UnparseableExtraField key: " + + onUnparseableData.getKey()); + } + } + try { + final ZipExtraField ze = createExtraField(headerId); + try { + if (local) { + ze.parseFromLocalFileData(data, start + WORD, length); + } else { + ze.parseFromCentralDirectoryData(data, start + WORD, length); + } + } catch (ArrayIndexOutOfBoundsException aiobe) { + throw (ZipException) new ZipException("Failed to parse corrupt ZIP extra field of type " + + Integer.toHexString(headerId.getValue())).initCause(aiobe); + } + v.add(ze); + } catch (final InstantiationException | IllegalAccessException ie) { + throw (ZipException) new ZipException(ie.getMessage()).initCause(ie); + } + start += length + WORD; + } + + final ZipExtraField[] result = new ZipExtraField[v.size()]; + return v.toArray(result); + } + + /** + * Merges the local file data fields of the given ZipExtraFields. + * @param data an array of ExtraFiles + * @return an array of bytes + */ + public static byte[] mergeLocalFileDataData(final ZipExtraField[] data) { + final boolean lastIsUnparseableHolder = data.length > 0 + && data[data.length - 1] instanceof UnparseableExtraFieldData; + final int regularExtraFieldCount = + lastIsUnparseableHolder ? data.length - 1 : data.length; + + int sum = WORD * regularExtraFieldCount; + for (final ZipExtraField element : data) { + sum += element.getLocalFileDataLength().getValue(); + } + + final byte[] result = new byte[sum]; + int start = 0; + for (int i = 0; i < regularExtraFieldCount; i++) { + System.arraycopy(data[i].getHeaderId().getBytes(), + 0, result, start, 2); + System.arraycopy(data[i].getLocalFileDataLength().getBytes(), + 0, result, start + 2, 2); + start += WORD; + final byte[] local = data[i].getLocalFileDataData(); + if (local != null) { + System.arraycopy(local, 0, result, start, local.length); + start += local.length; + } + } + if (lastIsUnparseableHolder) { + final byte[] local = data[data.length - 1].getLocalFileDataData(); + if (local != null) { + System.arraycopy(local, 0, result, start, local.length); + } + } + return result; + } + + /** + * Merges the central directory fields of the given ZipExtraFields. + * @param data an array of ExtraFields + * @return an array of bytes + */ + public static byte[] mergeCentralDirectoryData(final ZipExtraField[] data) { + final boolean lastIsUnparseableHolder = data.length > 0 + && data[data.length - 1] instanceof UnparseableExtraFieldData; + final int regularExtraFieldCount = + lastIsUnparseableHolder ? data.length - 1 : data.length; + + int sum = WORD * regularExtraFieldCount; + for (final ZipExtraField element : data) { + sum += element.getCentralDirectoryLength().getValue(); + } + final byte[] result = new byte[sum]; + int start = 0; + for (int i = 0; i < regularExtraFieldCount; i++) { + System.arraycopy(data[i].getHeaderId().getBytes(), + 0, result, start, 2); + System.arraycopy(data[i].getCentralDirectoryLength().getBytes(), + 0, result, start + 2, 2); + start += WORD; + final byte[] local = data[i].getCentralDirectoryData(); + if (local != null) { + System.arraycopy(local, 0, result, start, local.length); + start += local.length; + } + } + if (lastIsUnparseableHolder) { + final byte[] local = data[data.length - 1].getCentralDirectoryData(); + if (local != null) { + System.arraycopy(local, 0, result, start, local.length); + } + } + return result; + } + + /** + * "enum" for the possible actions to take if the extra field + * cannot be parsed. + * + * @since 1.1 + */ + public static final class UnparseableExtraField { + /** + * Key for "throw an exception" action. + */ + public static final int THROW_KEY = 0; + /** + * Key for "skip" action. + */ + public static final int SKIP_KEY = 1; + /** + * Key for "read" action. + */ + public static final int READ_KEY = 2; + + /** + * Throw an exception if field cannot be parsed. + */ + public static final UnparseableExtraField THROW + = new UnparseableExtraField(THROW_KEY); + + /** + * Skip the extra field entirely and don't make its data + * available - effectively removing the extra field data. + */ + public static final UnparseableExtraField SKIP + = new UnparseableExtraField(SKIP_KEY); + + /** + * Read the extra field data into an instance of {@link + * UnparseableExtraFieldData UnparseableExtraFieldData}. + */ + public static final UnparseableExtraField READ + = new UnparseableExtraField(READ_KEY); + + private final int key; + + private UnparseableExtraField(final int k) { + key = k; + } + + /** + * Key of the action to take. + * @return the key + */ + public int getKey() { return key; } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/GeneralPurposeBit.java b/src/main/java/org/apache/commons/compress/archivers/zip/GeneralPurposeBit.java new file mode 100644 index 000000000..dd363aa93 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/GeneralPurposeBit.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +/** + * Parser/encoder for the "general purpose bit" field in ZIP's local + * file and central directory headers. + * + * @since 1.1 + * @NotThreadSafe + */ +public final class GeneralPurposeBit implements Cloneable { + + /** + * Indicates that the file is encrypted. + */ + private static final int ENCRYPTION_FLAG = 1 << 0; + + /** + * Indicates the size of the sliding dictionary used by the compression method 6 (imploding). + * <ul> + * <li>0: 4096 bytes</li> + * <li>1: 8192 bytes</li> + * </ul> + */ + private static final int SLIDING_DICTIONARY_SIZE_FLAG = 1 << 1; + + /** + * Indicates the number of Shannon-Fano trees used by the compression method 6 (imploding). + * <ul> + * <li>0: 2 trees (lengths, distances)</li> + * <li>1: 3 trees (literals, lengths, distances)</li> + * </ul> + */ + private static final int NUMBER_OF_SHANNON_FANO_TREES_FLAG = 1 << 2; + + /** + * Indicates that a data descriptor stored after the file contents + * will hold CRC and size information. + */ + private static final int DATA_DESCRIPTOR_FLAG = 1 << 3; + + /** + * Indicates strong encryption. + */ + private static final int STRONG_ENCRYPTION_FLAG = 1 << 6; + + /** + * Indicates that filenames are written in UTF-8. + * + * <p>The only reason this is public is that {@link + * ZipArchiveOutputStream#EFS_FLAG} was public in Apache Commons + * Compress 1.0 and we needed a substitute for it.</p> + */ + public static final int UFT8_NAMES_FLAG = 1 << 11; + + private boolean languageEncodingFlag = false; + private boolean dataDescriptorFlag = false; + private boolean encryptionFlag = false; + private boolean strongEncryptionFlag = false; + private int slidingDictionarySize; + private int numberOfShannonFanoTrees; + + public GeneralPurposeBit() { + } + + /** + * whether the current entry uses UTF8 for file name and comment. + * @return whether the current entry uses UTF8 for file name and comment. + */ + public boolean usesUTF8ForNames() { + return languageEncodingFlag; + } + + /** + * whether the current entry will use UTF8 for file name and comment. + * @param b whether the current entry will use UTF8 for file name and comment. + */ + public void useUTF8ForNames(final boolean b) { + languageEncodingFlag = b; + } + + /** + * whether the current entry uses the data descriptor to store CRC + * and size information. + * @return whether the current entry uses the data descriptor to store CRC + * and size information + */ + public boolean usesDataDescriptor() { + return dataDescriptorFlag; + } + + /** + * whether the current entry will use the data descriptor to store + * CRC and size information. + * @param b whether the current entry will use the data descriptor to store + * CRC and size information + */ + public void useDataDescriptor(final boolean b) { + dataDescriptorFlag = b; + } + + /** + * whether the current entry is encrypted. + * @return whether the current entry is encrypted + */ + public boolean usesEncryption() { + return encryptionFlag; + } + + /** + * whether the current entry will be encrypted. + * @param b whether the current entry will be encrypted + */ + public void useEncryption(final boolean b) { + encryptionFlag = b; + } + + /** + * whether the current entry is encrypted using strong encryption. + * @return whether the current entry is encrypted using strong encryption + */ + public boolean usesStrongEncryption() { + return encryptionFlag && strongEncryptionFlag; + } + + /** + * whether the current entry will be encrypted using strong encryption. + * @param b whether the current entry will be encrypted using strong encryption + */ + public void useStrongEncryption(final boolean b) { + strongEncryptionFlag = b; + if (b) { + useEncryption(true); + } + } + + /** + * Returns the sliding dictionary size used by the compression method 6 (imploding). + */ + int getSlidingDictionarySize() { + return slidingDictionarySize; + } + + /** + * Returns the number of trees used by the compression method 6 (imploding). + */ + int getNumberOfShannonFanoTrees() { + return numberOfShannonFanoTrees; + } + + /** + * Encodes the set bits in a form suitable for ZIP archives. + * @return the encoded general purpose bits + */ + public byte[] encode() { + final byte[] result = new byte[2]; + encode(result, 0); + return result; + } + + + /** + * Encodes the set bits in a form suitable for ZIP archives. + * + * @param buf the output buffer + * @param offset + * The offset within the output buffer of the first byte to be written. + * must be non-negative and no larger than <tt>buf.length-2</tt> + */ + public void encode(final byte[] buf, final int offset) { + ZipShort.putShort((dataDescriptorFlag ? DATA_DESCRIPTOR_FLAG : 0) + | + (languageEncodingFlag ? UFT8_NAMES_FLAG : 0) + | + (encryptionFlag ? ENCRYPTION_FLAG : 0) + | + (strongEncryptionFlag ? STRONG_ENCRYPTION_FLAG : 0) + , buf, offset); + } + + /** + * Parses the supported flags from the given archive data. + * + * @param data local file header or a central directory entry. + * @param offset offset at which the general purpose bit starts + * @return parsed flags + */ + public static GeneralPurposeBit parse(final byte[] data, final int offset) { + final int generalPurposeFlag = ZipShort.getValue(data, offset); + final GeneralPurposeBit b = new GeneralPurposeBit(); + b.useDataDescriptor((generalPurposeFlag & DATA_DESCRIPTOR_FLAG) != 0); + b.useUTF8ForNames((generalPurposeFlag & UFT8_NAMES_FLAG) != 0); + b.useStrongEncryption((generalPurposeFlag & STRONG_ENCRYPTION_FLAG) != 0); + b.useEncryption((generalPurposeFlag & ENCRYPTION_FLAG) != 0); + b.slidingDictionarySize = (generalPurposeFlag & SLIDING_DICTIONARY_SIZE_FLAG) != 0 ? 8192 : 4096; + b.numberOfShannonFanoTrees = (generalPurposeFlag & NUMBER_OF_SHANNON_FANO_TREES_FLAG) != 0 ? 3 : 2; + return b; + } + + @Override + public int hashCode() { + return 3 * (7 * (13 * (17 * (encryptionFlag ? 1 : 0) + + (strongEncryptionFlag ? 1 : 0)) + + (languageEncodingFlag ? 1 : 0)) + + (dataDescriptorFlag ? 1 : 0)); + } + + @Override + public boolean equals(final Object o) { + if (!(o instanceof GeneralPurposeBit)) { + return false; + } + final GeneralPurposeBit g = (GeneralPurposeBit) o; + return g.encryptionFlag == encryptionFlag + && g.strongEncryptionFlag == strongEncryptionFlag + && g.languageEncodingFlag == languageEncodingFlag + && g.dataDescriptorFlag == dataDescriptorFlag; + } + + @Override + public Object clone() { + try { + return super.clone(); + } catch (final CloneNotSupportedException ex) { + // impossible + throw new RuntimeException("GeneralPurposeBit is not Cloneable?", ex); //NOSONAR + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/InflaterInputStreamWithStatistics.java b/src/main/java/org/apache/commons/compress/archivers/zip/InflaterInputStreamWithStatistics.java new file mode 100644 index 000000000..b10590fa6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/InflaterInputStreamWithStatistics.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.utils.InputStreamStatistics; + +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.Inflater; +import java.util.zip.InflaterInputStream; + +/** + * Helper class to provide statistics + * + * @since 1.17 + */ +/* package */ class InflaterInputStreamWithStatistics extends InflaterInputStream + implements InputStreamStatistics { + private long compressedCount = 0; + private long uncompressedCount = 0; + + public InflaterInputStreamWithStatistics(InputStream in) { + super(in); + } + + public InflaterInputStreamWithStatistics(InputStream in, Inflater inf) { + super(in, inf); + } + + public InflaterInputStreamWithStatistics(InputStream in, Inflater inf, int size) { + super(in, inf, size); + } + + @Override + protected void fill() throws IOException { + super.fill(); + compressedCount += inf.getRemaining(); + } + + @Override + public int read() throws IOException { + final int b = super.read(); + if (b > -1) { + uncompressedCount++; + } + return b; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + final int bytes = super.read(b, off, len); + if (bytes > -1) { + uncompressedCount += bytes; + } + return bytes; + } + + @Override + public long getCompressedCount() { + return compressedCount; + } + + @Override + public long getUncompressedCount() { + return uncompressedCount; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/JarMarker.java b/src/main/java/org/apache/commons/compress/archivers/zip/JarMarker.java new file mode 100644 index 000000000..ad12f37e2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/JarMarker.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.util.zip.ZipException; + +/** + * If this extra field is added as the very first extra field of the + * archive, Solaris will consider it an executable jar file. + * @Immutable + */ +public final class JarMarker implements ZipExtraField { + + private static final ZipShort ID = new ZipShort(0xCAFE); + private static final ZipShort NULL = new ZipShort(0); + private static final byte[] NO_BYTES = new byte[0]; + private static final JarMarker DEFAULT = new JarMarker(); + + /** No-arg constructor */ + public JarMarker() { + // empty + } + + /** + * Since JarMarker is stateless we can always use the same instance. + * @return the DEFAULT jarmaker. + */ + public static JarMarker getInstance() { + return DEFAULT; + } + + /** + * The Header-ID. + * @return the header id + */ + @Override + public ZipShort getHeaderId() { + return ID; + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * @return 0 + */ + @Override + public ZipShort getLocalFileDataLength() { + return NULL; + } + + /** + * Length of the extra field in the central directory - without + * Header-ID or length specifier. + * @return 0 + */ + @Override + public ZipShort getCentralDirectoryLength() { + return NULL; + } + + /** + * The actual data to put into local file data - without Header-ID + * or length specifier. + * @return the data + */ + @Override + public byte[] getLocalFileDataData() { + return NO_BYTES; + } + + /** + * The actual data to put central directory - without Header-ID or + * length specifier. + * @return the data + */ + @Override + public byte[] getCentralDirectoryData() { + return NO_BYTES; + } + + /** + * Populate data from this array as if it was in local file data. + * @param data an array of bytes + * @param offset the start offset + * @param length the number of bytes in the array from offset + * + * @throws ZipException on error + */ + @Override + public void parseFromLocalFileData(final byte[] data, final int offset, final int length) + throws ZipException { + if (length != 0) { + throw new ZipException("JarMarker doesn't expect any data"); + } + } + + /** + * Doesn't do anything special since this class always uses the + * same data in central directory and local file data. + */ + @Override + public void parseFromCentralDirectoryData(final byte[] buffer, final int offset, + final int length) + throws ZipException { + parseFromLocalFileData(buffer, offset, length); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java new file mode 100644 index 000000000..0a7581acf --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +/** + * A ZipEncoding, which uses a java.nio {@link + * java.nio.charset.Charset Charset} to encode names. + * <p>The methods of this class are reentrant.</p> + * @Immutable + */ +class NioZipEncoding implements ZipEncoding, CharsetAccessor { + + private final Charset charset; + private final boolean useReplacement; + private static final char REPLACEMENT = '?'; + private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT }; + private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT); + private static final char[] HEX_CHARS = new char[] { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' + }; + + + /** + * Construct an NioZipEncoding using the given charset. + * @param charset The character set to use. + * @param useReplacement should invalid characters be replaced, or reported. + */ + NioZipEncoding(final Charset charset, boolean useReplacement) { + this.charset = charset; + this.useReplacement = useReplacement; + } + + @Override + public Charset getCharset() { + return charset; + } + + /** + * @see ZipEncoding#canEncode(java.lang.String) + */ + @Override + public boolean canEncode(final String name) { + final CharsetEncoder enc = newEncoder(); + + return enc.canEncode(name); + } + + /** + * @see ZipEncoding#encode(java.lang.String) + */ + @Override + public ByteBuffer encode(final String name) { + final CharsetEncoder enc = newEncoder(); + + final CharBuffer cb = CharBuffer.wrap(name); + CharBuffer tmp = null; + ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining())); + + while (cb.remaining() > 0) { + final CoderResult res = enc.encode(cb, out, false); + + if (res.isUnmappable() || res.isMalformed()) { + + // write the unmappable characters in utf-16 + // pseudo-URL encoding style to ByteBuffer. + + int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length()); + if (spaceForSurrogate > out.remaining()) { + // if the destination buffer isn't over sized, assume that the presence of one + // unmappable character makes it likely that there will be more. Find all the + // un-encoded characters and allocate space based on those estimates. + int charCount = 0; + for (int i = cb.position() ; i < cb.limit(); i++) { + charCount += !enc.canEncode(cb.get(i)) ? 6 : 1; + } + int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount); + out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining()); + } + if (tmp == null) { + tmp = CharBuffer.allocate(6); + } + for (int i = 0; i < res.length(); ++i) { + out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out); + } + + } else if (res.isOverflow()) { + int increment = estimateIncrementalEncodingSize(enc, cb.remaining()); + out = ZipEncodingHelper.growBufferBy(out, increment); + } + } + // tell the encoder we are done + enc.encode(cb, out, true); + // may have caused underflow, but that's been ignored traditionally + + out.limit(out.position()); + out.rewind(); + return out; + } + + /** + * @see + * ZipEncoding#decode(byte[]) + */ + @Override + public String decode(final byte[] data) throws IOException { + return newDecoder() + .decode(ByteBuffer.wrap(data)).toString(); + } + + private static ByteBuffer encodeFully(CharsetEncoder enc, CharBuffer cb, ByteBuffer out) { + ByteBuffer o = out; + while (cb.hasRemaining()) { + CoderResult result = enc.encode(cb, o, false); + if (result.isOverflow()) { + int increment = estimateIncrementalEncodingSize(enc, cb.remaining()); + o = ZipEncodingHelper.growBufferBy(o, increment); + } + } + return o; + } + + private static CharBuffer encodeSurrogate(CharBuffer cb, char c) { + cb.position(0).limit(6); + cb.put('%'); + cb.put('U'); + + cb.put(HEX_CHARS[(c >> 12) & 0x0f]); + cb.put(HEX_CHARS[(c >> 8) & 0x0f]); + cb.put(HEX_CHARS[(c >> 4) & 0x0f]); + cb.put(HEX_CHARS[c & 0x0f]); + cb.flip(); + return cb; + } + + private CharsetEncoder newEncoder() { + if (useReplacement) { + return charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .replaceWith(REPLACEMENT_BYTES); + } else { + return charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + } + } + + private CharsetDecoder newDecoder() { + if (!useReplacement) { + return this.charset.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + } else { + return charset.newDecoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .replaceWith(REPLACEMENT_STRING); + } + } + + /** + * Estimate the initial encoded size (in bytes) for a character buffer. + * <p> + * The estimate assumes that one character consumes uses the maximum length encoding, + * whilst the rest use an average size encoding. This accounts for any BOM for UTF-16, at + * the expense of a couple of extra bytes for UTF-8 encoded ASCII. + * </p> + * + * @param enc encoder to use for estimates + * @param charChount number of characters in string + * @return estimated size in bytes. + */ + private static int estimateInitialBufferSize(CharsetEncoder enc, int charChount) { + float first = enc.maxBytesPerChar(); + float rest = (charChount - 1) * enc.averageBytesPerChar(); + return (int) Math.ceil(first + rest); + } + + /** + * Estimate the size needed for remaining characters + * + * @param enc encoder to use for estimates + * @param charCount number of characters remaining + * @return estimated size in bytes. + */ + private static int estimateIncrementalEncodingSize(CharsetEncoder enc, int charCount) { + return (int) Math.ceil(charCount * enc.averageBytesPerChar()); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/PKWareExtraHeader.java b/src/main/java/org/apache/commons/compress/archivers/zip/PKWareExtraHeader.java new file mode 100644 index 000000000..7177c8759 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/PKWareExtraHeader.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Base class for all PKWare strong crypto extra headers. + * + * <p>This base class acts as a marker so you know you can ignore all + * extra fields that extend this class if you are not interested in + * the meta data of PKWare strong encryption.</p> + * + * <b>Algorithm IDs</b> - integer identifier of the encryption algorithm from + * the following range + * + * <ul> + * <li>0x6601 - DES</li> + * <li>0x6602 - RC2 (version needed to extract < 5.2)</li> + * <li>0x6603 - 3DES 168</li> + * <li>0x6609 - 3DES 112</li> + * <li>0x660E - AES 128</li> + * <li>0x660F - AES 192</li> + * <li>0x6610 - AES 256</li> + * <li>0x6702 - RC2 (version needed to extract >= 5.2)</li> + * <li>0x6720 - Blowfish</li> + * <li>0x6721 - Twofish</li> + * <li>0x6801 - RC4</li> + * <li>0xFFFF - Unknown algorithm</li> + * </ul> + * + * <b>Hash Algorithms</b> - integer identifier of the hash algorithm from the + * following range + * + * <ul> + * <li>0x0000 - none</li> + * <li>0x0001 - CRC32</li> + * <li>0x8003 - MD5</li> + * <li>0x8004 - SHA1</li> + * <li>0x8007 - RIPEMD160</li> + * <li>0x800C - SHA256</li> + * <li>0x800D - SHA384</li> + * <li>0x800E - SHA512</li> + * </ul> + * + * @since 1.11 + */ +public abstract class PKWareExtraHeader implements ZipExtraField { + + private final ZipShort headerId; + /** + * Extra field data in local file data - without Header-ID or length + * specifier. + */ + private byte[] localData; + /** + * Extra field data in central directory - without Header-ID or length + * specifier. + */ + private byte[] centralData; + + protected PKWareExtraHeader(final ZipShort headerId) { + this.headerId = headerId; + } + + /** + * Get the header id. + * + * @return the header id + */ + @Override + public ZipShort getHeaderId() { + return headerId; + } + + /** + * Set the extra field data in the local file data - without Header-ID or + * length specifier. + * + * @param data + * the field data to use + */ + public void setLocalFileDataData(final byte[] data) { + localData = ZipUtil.copy(data); + } + + /** + * Get the length of the local data. + * + * @return the length of the local data + */ + @Override + public ZipShort getLocalFileDataLength() { + return new ZipShort(localData != null ? localData.length : 0); + } + + /** + * Get the local data. + * + * @return the local data + */ + @Override + public byte[] getLocalFileDataData() { + return ZipUtil.copy(localData); + } + + /** + * Set the extra field data in central directory. + * + * @param data + * the data to use + */ + public void setCentralDirectoryData(final byte[] data) { + centralData = ZipUtil.copy(data); + } + + /** + * Get the central data length. If there is no central data, get the local + * file data length. + * + * @return the central data length + */ + @Override + public ZipShort getCentralDirectoryLength() { + if (centralData != null) { + return new ZipShort(centralData.length); + } + return getLocalFileDataLength(); + } + + /** + * Get the central data. + * + * @return the central data if present, else return the local file data + */ + @Override + public byte[] getCentralDirectoryData() { + if (centralData != null) { + return ZipUtil.copy(centralData); + } + return getLocalFileDataData(); + } + + /** + * @param data + * the array of bytes. + * @param offset + * the source location in the data array. + * @param length + * the number of bytes to use in the data array. + * @see ZipExtraField#parseFromLocalFileData(byte[], int, int) + */ + @Override + public void parseFromLocalFileData(final byte[] data, final int offset, final int length) { + final byte[] tmp = new byte[length]; + System.arraycopy(data, offset, tmp, 0, length); + setLocalFileDataData(tmp); + } + + /** + * @param data + * the array of bytes. + * @param offset + * the source location in the data array. + * @param length + * the number of bytes to use in the data array. + * @see ZipExtraField#parseFromCentralDirectoryData(byte[], int, int) + */ + @Override + public void parseFromCentralDirectoryData(final byte[] data, final int offset, final int length) { + final byte[] tmp = new byte[length]; + System.arraycopy(data, offset, tmp, 0, length); + setCentralDirectoryData(tmp); + if (localData == null) { + setLocalFileDataData(tmp); + } + } + + /** + * Encryption algorithm. + * + * @since 1.11 + */ + public enum EncryptionAlgorithm { + DES(0x6601), + RC2pre52(0x6602), + TripleDES168(0x6603), + TripleDES192(0x6609), + AES128(0x660E), + AES192(0x660F), + AES256(0x6610), + RC2(0x6702), + RC4(0x6801), + UNKNOWN(0xFFFF); + + private final int code; + + private static final Map<Integer, EncryptionAlgorithm> codeToEnum; + + static { + final Map<Integer, EncryptionAlgorithm> cte = new HashMap<>(); + for (final EncryptionAlgorithm method : values()) { + cte.put(method.getCode(), method); + } + codeToEnum = Collections.unmodifiableMap(cte); + } + + /** + * private constructor for enum style class. + */ + EncryptionAlgorithm(final int code) { + this.code = code; + } + + /** + * the algorithm id. + * + * @return the PKWare AlgorithmId + */ + public int getCode() { + return code; + } + + /** + * Returns the EncryptionAlgorithm for the given code or null if the + * method is not known. + * @param code the code of the algorithm + * @return the EncryptionAlgorithm for the given code or null + * if the method is not known + */ + public static EncryptionAlgorithm getAlgorithmByCode(final int code) { + return codeToEnum.get(code); + } + } + + /** + * Hash Algorithm + * + * @since 1.11 + */ + public enum HashAlgorithm { + NONE(0), + CRC32(1), + MD5(0x8003), + SHA1(0x8004), + RIPEND160(0x8007), + SHA256(0x800C), + SHA384(0x800D), + SHA512(0x800E); + + private final int code; + + private static final Map<Integer, HashAlgorithm> codeToEnum; + + static { + final Map<Integer, HashAlgorithm> cte = new HashMap<>(); + for (final HashAlgorithm method : values()) { + cte.put(method.getCode(), method); + } + codeToEnum = Collections.unmodifiableMap(cte); + } + + /** + * private constructor for enum style class. + */ + HashAlgorithm(final int code) { + this.code = code; + } + + /** + * the hash algorithm ID. + * + * @return the PKWare hashAlg + */ + public int getCode() { + return code; + } + + /** + * Returns the HashAlgorithm for the given code or null if the method is + * not known. + * @param code the code of the algorithm + * @return the HashAlgorithm for the given code or null + * if the method is not known + */ + public static HashAlgorithm getAlgorithmByCode(final int code) { + return codeToEnum.get(code); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java b/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java new file mode 100644 index 000000000..a381d0a28 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore; +import org.apache.commons.compress.parallel.InputStreamSupplier; +import org.apache.commons.compress.parallel.ScatterGatherBackingStore; +import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.zip.Deflater; + +import static java.util.Collections.synchronizedList; +import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest; + +/** + * Creates a zip in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances. + * <p> + * Note that this class generally makes no guarantees about the order of things written to + * the output file. Things that need to come in a specific order (manifests, directories) + * must be handled by the client of this class, usually by writing these things to the + * {@link ZipArchiveOutputStream} <em>before</em> calling {@link #writeTo writeTo} on this class.</p> + * <p> + * The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of + * memory model consistency, this will be shut down by this class prior to completion. + * </p> + * @since 1.10 + */ +public class ParallelScatterZipCreator { + private final List<ScatterZipOutputStream> streams = synchronizedList(new ArrayList<ScatterZipOutputStream>()); + private final ExecutorService es; + private final ScatterGatherBackingStoreSupplier backingStoreSupplier; + private final List<Future<Object>> futures = new ArrayList<>(); + + private final long startedAt = System.currentTimeMillis(); + private long compressionDoneAt = 0; + private long scatterDoneAt; + + private static class DefaultBackingStoreSupplier implements ScatterGatherBackingStoreSupplier { + final AtomicInteger storeNum = new AtomicInteger(0); + + @Override + public ScatterGatherBackingStore get() throws IOException { + final File tempFile = File.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet()); + return new FileBasedScatterGatherBackingStore(tempFile); + } + } + + private ScatterZipOutputStream createDeferred(final ScatterGatherBackingStoreSupplier scatterGatherBackingStoreSupplier) + throws IOException { + final ScatterGatherBackingStore bs = scatterGatherBackingStoreSupplier.get(); + // lifecycle is bound to the ScatterZipOutputStream returned + final StreamCompressor sc = StreamCompressor.create(Deflater.DEFAULT_COMPRESSION, bs); //NOSONAR + return new ScatterZipOutputStream(bs, sc); + } + + private final ThreadLocal<ScatterZipOutputStream> tlScatterStreams = new ThreadLocal<ScatterZipOutputStream>() { + @Override + protected ScatterZipOutputStream initialValue() { + try { + final ScatterZipOutputStream scatterStream = createDeferred(backingStoreSupplier); + streams.add(scatterStream); + return scatterStream; + } catch (final IOException e) { + throw new RuntimeException(e); //NOSONAR + } + } + }; + + /** + * Create a ParallelScatterZipCreator with default threads, which is set to the number of available + * processors, as defined by {@link java.lang.Runtime#availableProcessors} + */ + public ParallelScatterZipCreator() { + this(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())); + } + + /** + * Create a ParallelScatterZipCreator + * + * @param executorService The executorService to use for parallel scheduling. For technical reasons, + * this will be shut down by this class. + */ + public ParallelScatterZipCreator(final ExecutorService executorService) { + this(executorService, new DefaultBackingStoreSupplier()); + } + + /** + * Create a ParallelScatterZipCreator + * + * @param executorService The executorService to use. For technical reasons, this will be shut down + * by this class. + * @param backingStoreSupplier The supplier of backing store which shall be used + */ + public ParallelScatterZipCreator(final ExecutorService executorService, + final ScatterGatherBackingStoreSupplier backingStoreSupplier) { + this.backingStoreSupplier = backingStoreSupplier; + es = executorService; + } + + /** + * Adds an archive entry to this archive. + * <p> + * This method is expected to be called from a single client thread + * </p> + * + * @param zipArchiveEntry The entry to add. + * @param source The source input stream supplier + */ + + public void addArchiveEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) { + submit(createCallable(zipArchiveEntry, source)); + } + + /** + * Adds an archive entry to this archive. + * <p> + * This method is expected to be called from a single client thread + * </p> + * + * @param zipArchiveEntryRequestSupplier Should supply the entry to be added. + * @since 1.13 + */ + public void addArchiveEntry(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) { + submit(createCallable(zipArchiveEntryRequestSupplier)); + } + + /** + * Submit a callable for compression. + * + * @see ParallelScatterZipCreator#createCallable for details of if/when to use this. + * + * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller. + */ + public final void submit(final Callable<Object> callable) { + futures.add(es.submit(callable)); + } + + /** + * Create a callable that will compress the given archive entry. + * + * <p>This method is expected to be called from a single client thread.</p> + * + * Consider using {@link #addArchiveEntry addArchiveEntry}, which wraps this method and {@link #submit submit}. + * The most common use case for using {@link #createCallable createCallable} and {@link #submit submit} from a + * client is if you want to wrap the callable in something that can be prioritized by the supplied + * {@link ExecutorService}, for instance to process large or slow files first. + * Since the creation of the {@link ExecutorService} is handled by the client, all of this is up to the client. + * + * @param zipArchiveEntry The entry to add. + * @param source The source input stream supplier + * @return A callable that should subsequently passed to #submit, possibly in a wrapped/adapted from. The + * value of this callable is not used, but any exceptions happening inside the compression + * will be propagated through the callable. + */ + + public final Callable<Object> createCallable(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) { + final int method = zipArchiveEntry.getMethod(); + if (method == ZipMethod.UNKNOWN_CODE) { + throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry); + } + final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source); + return new Callable<Object>() { + @Override + public Object call() throws Exception { + tlScatterStreams.get().addArchiveEntry(zipArchiveEntryRequest); + return null; + } + }; + } + + /** + * Create a callable that will compress archive entry supplied by {@link ZipArchiveEntryRequestSupplier}. + * + * <p>This method is expected to be called from a single client thread.</p> + * + * The same as {@link #createCallable(ZipArchiveEntry, InputStreamSupplier)}, but the archive entry + * to be added is supplied by a {@link ZipArchiveEntryRequestSupplier}. + * + * @see #createCallable(ZipArchiveEntry, InputStreamSupplier) + * + * @param zipArchiveEntryRequestSupplier Should supply the entry to be added. + * @return A callable that should subsequently passed to #submit, possibly in a wrapped/adapted from. The + * value of this callable is not used, but any exceptions happening inside the compression + * will be propagated through the callable. + * @since 1.13 + */ + public final Callable<Object> createCallable(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) { + return new Callable<Object>() { + @Override + public Object call() throws Exception { + tlScatterStreams.get().addArchiveEntry(zipArchiveEntryRequestSupplier.get()); + return null; + } + }; + } + + /** + * Write the contents this to the target {@link ZipArchiveOutputStream}. + * <p> + * It may be beneficial to write things like directories and manifest files to the targetStream + * before calling this method. + * </p> + * + * <p>Calling this method will shut down the {@link ExecutorService} used by this class. If any of the {@link + * Callable}s {@link #submit}ted to this instance throws an exception, the archive can not be created properly and + * this method will throw an exception.</p> + * + * @param targetStream The {@link ZipArchiveOutputStream} to receive the contents of the scatter streams + * @throws IOException If writing fails + * @throws InterruptedException If we get interrupted + * @throws ExecutionException If something happens in the parallel execution + */ + public void writeTo(final ZipArchiveOutputStream targetStream) + throws IOException, InterruptedException, ExecutionException { + + // Make sure we catch any exceptions from parallel phase + try { + for (final Future<?> future : futures) { + future.get(); + } + } finally { + es.shutdown(); + } + + es.awaitTermination(1000 * 60L, TimeUnit.SECONDS); // == Infinity. We really *must* wait for this to complete + + // It is important that all threads terminate before we go on, ensure happens-before relationship + compressionDoneAt = System.currentTimeMillis(); + + synchronized (streams) { + for (final ScatterZipOutputStream scatterStream : streams) { + scatterStream.writeTo(targetStream); + scatterStream.close(); + } + } + + scatterDoneAt = System.currentTimeMillis(); + } + + /** + * Returns a message describing the overall statistics of the compression run + * + * @return A string + */ + public ScatterStatistics getStatisticsMessage() { + return new ScatterStatistics(compressionDoneAt - startedAt, scatterDoneAt - compressionDoneAt); + } +} + diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ResourceAlignmentExtraField.java b/src/main/java/org/apache/commons/compress/archivers/zip/ResourceAlignmentExtraField.java new file mode 100644 index 000000000..3d0741c04 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ResourceAlignmentExtraField.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + + +import java.util.zip.ZipException; + +/** + * An extra field who's sole purpose is to align and pad the local file header + * so that the entry's data starts at a certain position. + * + * <p>The padding content of the padding is ignored and not retained + * when reading a padding field.</p> + * + * <p>This enables Commons Compress to create "aligned" archives + * similar to Android's zipalign command line tool.</p> + * + * @since 1.14 + * @see "https://developer.android.com/studio/command-line/zipalign.html" + * @see ZipArchiveEntry#setAlignment + */ +public class ResourceAlignmentExtraField implements ZipExtraField { + + /** + * Extra field id used for storing alignment and padding. + */ + public static final ZipShort ID = new ZipShort(0xa11e); + + public static final int BASE_SIZE = 2; + + private static final int ALLOW_METHOD_MESSAGE_CHANGE_FLAG = 0x8000; + + private short alignment; + + private boolean allowMethodChange; + + private int padding = 0; + + public ResourceAlignmentExtraField() { + } + + public ResourceAlignmentExtraField(int alignment) { + this(alignment, false); + } + + public ResourceAlignmentExtraField(int alignment, boolean allowMethodChange) { + this(alignment, allowMethodChange, 0); + } + + public ResourceAlignmentExtraField(int alignment, boolean allowMethodChange, int padding) { + if (alignment < 0 || alignment > 0x7fff) { + throw new IllegalArgumentException("Alignment must be between 0 and 0x7fff, was: " + alignment); + } + this.alignment = (short) alignment; + this.allowMethodChange = allowMethodChange; + this.padding = padding; + } + + /** + * Gets requested alignment. + * + * @return + * requested alignment. + */ + public short getAlignment() { + return alignment; + } + + /** + * Indicates whether method change is allowed when re-compressing the zip file. + * + * @return + * true if method change is allowed, false otherwise. + */ + public boolean allowMethodChange() { + return allowMethodChange; + } + + @Override + public ZipShort getHeaderId() { + return ID; + } + + @Override + public ZipShort getLocalFileDataLength() { + return new ZipShort(BASE_SIZE + padding); + } + + @Override + public ZipShort getCentralDirectoryLength() { + return new ZipShort(BASE_SIZE); + } + + @Override + public byte[] getLocalFileDataData() { + byte[] content = new byte[BASE_SIZE + padding]; + ZipShort.putShort(alignment | (allowMethodChange ? ALLOW_METHOD_MESSAGE_CHANGE_FLAG : 0), + content, 0); + return content; + } + + @Override + public byte[] getCentralDirectoryData() { + return ZipShort.getBytes(alignment | (allowMethodChange ? ALLOW_METHOD_MESSAGE_CHANGE_FLAG : 0)); + } + + @Override + public void parseFromLocalFileData(byte[] buffer, int offset, int length) throws ZipException { + parseFromCentralDirectoryData(buffer, offset, length); + this.padding = length - BASE_SIZE; + } + + @Override + public void parseFromCentralDirectoryData(byte[] buffer, int offset, int length) throws ZipException { + if (length < BASE_SIZE) { + throw new ZipException("Too short content for ResourceAlignmentExtraField (0xa11e): " + length); + } + int alignmentValue = ZipShort.getValue(buffer, offset); + this.alignment = (short) (alignmentValue & (ALLOW_METHOD_MESSAGE_CHANGE_FLAG - 1)); + this.allowMethodChange = (alignmentValue & ALLOW_METHOD_MESSAGE_CHANGE_FLAG) != 0; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ScatterStatistics.java b/src/main/java/org/apache/commons/compress/archivers/zip/ScatterStatistics.java new file mode 100644 index 000000000..83c5bb5e9 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ScatterStatistics.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.commons.compress.archivers.zip; + +/** + * Provides information about a scatter compression run. + * + * @since 1.10 + */ +public class ScatterStatistics { + private final long compressionElapsed; + private final long mergingElapsed; + + ScatterStatistics(final long compressionElapsed, final long mergingElapsed) { + this.compressionElapsed = compressionElapsed; + this.mergingElapsed = mergingElapsed; + } + + /** + * The number of milliseconds elapsed in the parallel compression phase + * @return The number of milliseconds elapsed + */ + public long getCompressionElapsed() { + return compressionElapsed; + } + + /** + * The number of milliseconds elapsed in merging the results of the parallel compression, the IO phase + * @return The number of milliseconds elapsed + */ + public long getMergingElapsed() { + return mergingElapsed; + } + + @Override + public String toString() { + return "compressionElapsed=" + compressionElapsed + "ms, mergingElapsed=" + mergingElapsed + "ms"; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java new file mode 100644 index 000000000..7001c84a1 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + + +import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore; +import org.apache.commons.compress.parallel.ScatterGatherBackingStore; +import org.apache.commons.compress.utils.BoundedInputStream; + +import java.io.Closeable; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.zip.Deflater; + +/** + * A zip output stream that is optimized for multi-threaded scatter/gather construction of zip files. + * <p> + * The internal data format of the entries used by this class are entirely private to this class + * and are not part of any public api whatsoever. + * </p> + * <p>It is possible to extend this class to support different kinds of backing storage, the default + * implementation only supports file-based backing. + * </p> + * Thread safety: This class supports multiple threads. But the "writeTo" method must be called + * by the thread that originally created the {@link ZipArchiveEntry}. + * + * @since 1.10 + */ +public class ScatterZipOutputStream implements Closeable { + private final Queue<CompressedEntry> items = new ConcurrentLinkedQueue<>(); + private final ScatterGatherBackingStore backingStore; + private final StreamCompressor streamCompressor; + + private static class CompressedEntry { + final ZipArchiveEntryRequest zipArchiveEntryRequest; + final long crc; + final long compressedSize; + final long size; + + public CompressedEntry(final ZipArchiveEntryRequest zipArchiveEntryRequest, final long crc, final long compressedSize, final long size) { + this.zipArchiveEntryRequest = zipArchiveEntryRequest; + this.crc = crc; + this.compressedSize = compressedSize; + this.size = size; + } + + /** + * Update the original {@link ZipArchiveEntry} with sizes/crc + * Do not use this methods from threads that did not create the instance itself ! + * @return the zipArchiveEntry that is basis for this request + */ + + public ZipArchiveEntry transferToArchiveEntry(){ + final ZipArchiveEntry entry = zipArchiveEntryRequest.getZipArchiveEntry(); + entry.setCompressedSize(compressedSize); + entry.setSize(size); + entry.setCrc(crc); + entry.setMethod(zipArchiveEntryRequest.getMethod()); + return entry; + } + } + + public ScatterZipOutputStream(final ScatterGatherBackingStore backingStore, + final StreamCompressor streamCompressor) { + this.backingStore = backingStore; + this.streamCompressor = streamCompressor; + } + + /** + * Add an archive entry to this scatter stream. + * + * @param zipArchiveEntryRequest The entry to write. + * @throws IOException If writing fails + */ + public void addArchiveEntry(final ZipArchiveEntryRequest zipArchiveEntryRequest) throws IOException { + try (final InputStream payloadStream = zipArchiveEntryRequest.getPayloadStream()) { + streamCompressor.deflate(payloadStream, zipArchiveEntryRequest.getMethod()); + } + items.add(new CompressedEntry(zipArchiveEntryRequest, streamCompressor.getCrc32(), + streamCompressor.getBytesWrittenForLastEntry(), streamCompressor.getBytesRead())); + } + + /** + * Write the contents of this scatter stream to a target archive. + * + * @param target The archive to receive the contents of this {@link ScatterZipOutputStream}. + * @throws IOException If writing fails + */ + public void writeTo(final ZipArchiveOutputStream target) throws IOException { + backingStore.closeForWriting(); + try (final InputStream data = backingStore.getInputStream()) { + for (final CompressedEntry compressedEntry : items) { + try (final BoundedInputStream rawStream = new BoundedInputStream(data, + compressedEntry.compressedSize)) { + target.addRawArchiveEntry(compressedEntry.transferToArchiveEntry(), rawStream); + } + } + } + } + + + /** + * Closes this stream, freeing all resources involved in the creation of this stream. + * @throws IOException If closing fails + */ + @Override + public void close() throws IOException { + try { + backingStore.close(); + } finally { + streamCompressor.close(); + } + } + + /** + * Create a {@link ScatterZipOutputStream} with default compression level that is backed by a file + * + * @param file The file to offload compressed data into. + * @return A ScatterZipOutputStream that is ready for use. + * @throws FileNotFoundException if the file cannot be found + */ + public static ScatterZipOutputStream fileBased(final File file) throws FileNotFoundException { + return fileBased(file, Deflater.DEFAULT_COMPRESSION); + } + + /** + * Create a {@link ScatterZipOutputStream} that is backed by a file + * + * @param file The file to offload compressed data into. + * @param compressionLevel The compression level to use, @see #Deflater + * @return A ScatterZipOutputStream that is ready for use. + * @throws FileNotFoundException if the file cannot be found + */ + public static ScatterZipOutputStream fileBased(final File file, final int compressionLevel) throws FileNotFoundException { + final ScatterGatherBackingStore bs = new FileBasedScatterGatherBackingStore(file); + // lifecycle is bound to the ScatterZipOutputStream returned + final StreamCompressor sc = StreamCompressor.create(compressionLevel, bs); //NOSONAR + return new ScatterZipOutputStream(bs, sc); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/StreamCompressor.java b/src/main/java/org/apache/commons/compress/archivers/zip/StreamCompressor.java new file mode 100644 index 000000000..1e8d68b64 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/StreamCompressor.java @@ -0,0 +1,340 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.parallel.ScatterGatherBackingStore; + +import java.io.Closeable; +import java.io.DataOutput; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; +import java.util.zip.CRC32; +import java.util.zip.Deflater; +import java.util.zip.ZipEntry; + +/** + * Encapsulates a {@link Deflater} and crc calculator, handling multiple types of output streams. + * Currently {@link java.util.zip.ZipEntry#DEFLATED} and {@link java.util.zip.ZipEntry#STORED} are the only + * supported compression methods. + * + * @since 1.10 + */ +public abstract class StreamCompressor implements Closeable { + + /* + * Apparently Deflater.setInput gets slowed down a lot on Sun JVMs + * when it gets handed a really big buffer. See + * https://issues.apache.org/bugzilla/show_bug.cgi?id=45396 + * + * Using a buffer size of 8 kB proved to be a good compromise + */ + private static final int DEFLATER_BLOCK_SIZE = 8192; + + private final Deflater def; + + private final CRC32 crc = new CRC32(); + + private long writtenToOutputStreamForLastEntry = 0; + private long sourcePayloadLength = 0; + private long totalWrittenToOutputStream = 0; + + private static final int BUFFER_SIZE = 4096; + private final byte[] outputBuffer = new byte[BUFFER_SIZE]; + private final byte[] readerBuf = new byte[BUFFER_SIZE]; + + StreamCompressor(final Deflater deflater) { + this.def = deflater; + } + + /** + * Create a stream compressor with the given compression level. + * + * @param os The stream to receive output + * @param deflater The deflater to use + * @return A stream compressor + */ + static StreamCompressor create(final OutputStream os, final Deflater deflater) { + return new OutputStreamCompressor(deflater, os); + } + + /** + * Create a stream compressor with the default compression level. + * + * @param os The stream to receive output + * @return A stream compressor + */ + static StreamCompressor create(final OutputStream os) { + return create(os, new Deflater(Deflater.DEFAULT_COMPRESSION, true)); + } + + /** + * Create a stream compressor with the given compression level. + * + * @param os The DataOutput to receive output + * @param deflater The deflater to use for the compressor + * @return A stream compressor + */ + static StreamCompressor create(final DataOutput os, final Deflater deflater) { + return new DataOutputCompressor(deflater, os); + } + + /** + * Create a stream compressor with the given compression level. + * + * @param os The SeekableByteChannel to receive output + * @param deflater The deflater to use for the compressor + * @return A stream compressor + * @since 1.13 + */ + static StreamCompressor create(final SeekableByteChannel os, final Deflater deflater) { + return new SeekableByteChannelCompressor(deflater, os); + } + + /** + * Create a stream compressor with the given compression level. + * + * @param compressionLevel The {@link Deflater} compression level + * @param bs The ScatterGatherBackingStore to receive output + * @return A stream compressor + */ + public static StreamCompressor create(final int compressionLevel, final ScatterGatherBackingStore bs) { + final Deflater deflater = new Deflater(compressionLevel, true); + return new ScatterGatherBackingStoreCompressor(deflater, bs); + } + + /** + * Create a stream compressor with the default compression level. + * + * @param bs The ScatterGatherBackingStore to receive output + * @return A stream compressor + */ + public static StreamCompressor create(final ScatterGatherBackingStore bs) { + return create(Deflater.DEFAULT_COMPRESSION, bs); + } + + /** + * The crc32 of the last deflated file + * + * @return the crc32 + */ + + public long getCrc32() { + return crc.getValue(); + } + + /** + * Return the number of bytes read from the source stream + * + * @return The number of bytes read, never negative + */ + public long getBytesRead() { + return sourcePayloadLength; + } + + /** + * The number of bytes written to the output for the last entry + * + * @return The number of bytes, never negative + */ + public long getBytesWrittenForLastEntry() { + return writtenToOutputStreamForLastEntry; + } + + /** + * The total number of bytes written to the output for all files + * + * @return The number of bytes, never negative + */ + public long getTotalBytesWritten() { + return totalWrittenToOutputStream; + } + + + /** + * Deflate the given source using the supplied compression method + * + * @param source The source to compress + * @param method The #ZipArchiveEntry compression method + * @throws IOException When failures happen + */ + + public void deflate(final InputStream source, final int method) throws IOException { + reset(); + int length; + + while ((length = source.read(readerBuf, 0, readerBuf.length)) >= 0) { + write(readerBuf, 0, length, method); + } + if (method == ZipEntry.DEFLATED) { + flushDeflater(); + } + } + + /** + * Writes bytes to ZIP entry. + * + * @param b the byte array to write + * @param offset the start position to write from + * @param length the number of bytes to write + * @param method the comrpession method to use + * @return the number of bytes written to the stream this time + * @throws IOException on error + */ + long write(final byte[] b, final int offset, final int length, final int method) throws IOException { + final long current = writtenToOutputStreamForLastEntry; + crc.update(b, offset, length); + if (method == ZipEntry.DEFLATED) { + writeDeflated(b, offset, length); + } else { + writeCounted(b, offset, length); + } + sourcePayloadLength += length; + return writtenToOutputStreamForLastEntry - current; + } + + + void reset() { + crc.reset(); + def.reset(); + sourcePayloadLength = 0; + writtenToOutputStreamForLastEntry = 0; + } + + @Override + public void close() throws IOException { + def.end(); + } + + void flushDeflater() throws IOException { + def.finish(); + while (!def.finished()) { + deflate(); + } + } + + private void writeDeflated(final byte[] b, final int offset, final int length) + throws IOException { + if (length > 0 && !def.finished()) { + if (length <= DEFLATER_BLOCK_SIZE) { + def.setInput(b, offset, length); + deflateUntilInputIsNeeded(); + } else { + final int fullblocks = length / DEFLATER_BLOCK_SIZE; + for (int i = 0; i < fullblocks; i++) { + def.setInput(b, offset + i * DEFLATER_BLOCK_SIZE, + DEFLATER_BLOCK_SIZE); + deflateUntilInputIsNeeded(); + } + final int done = fullblocks * DEFLATER_BLOCK_SIZE; + if (done < length) { + def.setInput(b, offset + done, length - done); + deflateUntilInputIsNeeded(); + } + } + } + } + + private void deflateUntilInputIsNeeded() throws IOException { + while (!def.needsInput()) { + deflate(); + } + } + + void deflate() throws IOException { + final int len = def.deflate(outputBuffer, 0, outputBuffer.length); + if (len > 0) { + writeCounted(outputBuffer, 0, len); + } + } + + public void writeCounted(final byte[] data) throws IOException { + writeCounted(data, 0, data.length); + } + + public void writeCounted(final byte[] data, final int offset, final int length) throws IOException { + writeOut(data, offset, length); + writtenToOutputStreamForLastEntry += length; + totalWrittenToOutputStream += length; + } + + protected abstract void writeOut(byte[] data, int offset, int length) throws IOException; + + private static final class ScatterGatherBackingStoreCompressor extends StreamCompressor { + private final ScatterGatherBackingStore bs; + + public ScatterGatherBackingStoreCompressor(final Deflater deflater, final ScatterGatherBackingStore bs) { + super(deflater); + this.bs = bs; + } + + @Override + protected final void writeOut(final byte[] data, final int offset, final int length) + throws IOException { + bs.writeOut(data, offset, length); + } + } + + private static final class OutputStreamCompressor extends StreamCompressor { + private final OutputStream os; + + public OutputStreamCompressor(final Deflater deflater, final OutputStream os) { + super(deflater); + this.os = os; + } + + @Override + protected final void writeOut(final byte[] data, final int offset, final int length) + throws IOException { + os.write(data, offset, length); + } + } + + private static final class DataOutputCompressor extends StreamCompressor { + private final DataOutput raf; + + public DataOutputCompressor(final Deflater deflater, final DataOutput raf) { + super(deflater); + this.raf = raf; + } + + @Override + protected final void writeOut(final byte[] data, final int offset, final int length) + throws IOException { + raf.write(data, offset, length); + } + } + + private static final class SeekableByteChannelCompressor extends StreamCompressor { + private final SeekableByteChannel channel; + + public SeekableByteChannelCompressor(final Deflater deflater, + final SeekableByteChannel channel) { + super(deflater); + this.channel = channel; + } + + @Override + protected final void writeOut(final byte[] data, final int offset, final int length) + throws IOException { + channel.write(ByteBuffer.wrap(data, offset, length)); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/UnicodeCommentExtraField.java b/src/main/java/org/apache/commons/compress/archivers/zip/UnicodeCommentExtraField.java new file mode 100644 index 000000000..0b0e84e26 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/UnicodeCommentExtraField.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +/** + * Info-ZIP Unicode Comment Extra Field (0x6375): + * + * <p>Stores the UTF-8 version of the file comment as stored in the + * central directory header.</p> + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">PKWARE + * APPNOTE.TXT, section 4.6.8</a> + * + * @NotThreadSafe super-class is not thread-safe + */ +public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { + + public static final ZipShort UCOM_ID = new ZipShort(0x6375); + + public UnicodeCommentExtraField () { + } + + /** + * Assemble as unicode comment extension from the name given as + * text as well as the encoded bytes actually written to the archive. + * + * @param text The file name + * @param bytes the bytes actually written to the archive + * @param off The offset of the encoded comment in <code>bytes</code>. + * @param len The length of the encoded comment or comment in + * <code>bytes</code>. + */ + public UnicodeCommentExtraField(final String text, final byte[] bytes, final int off, + final int len) { + super(text, bytes, off, len); + } + + /** + * Assemble as unicode comment extension from the comment given as + * text as well as the bytes actually written to the archive. + * + * @param comment The file comment + * @param bytes the bytes actually written to the archive + */ + public UnicodeCommentExtraField(final String comment, final byte[] bytes) { + super(comment, bytes); + } + + @Override + public ZipShort getHeaderId() { + return UCOM_ID; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/UnicodePathExtraField.java b/src/main/java/org/apache/commons/compress/archivers/zip/UnicodePathExtraField.java new file mode 100644 index 000000000..510c5adeb --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/UnicodePathExtraField.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +/** + * Info-ZIP Unicode Path Extra Field (0x7075): + * + * <p>Stores the UTF-8 version of the file name field as stored in the + * local header and central directory header.</p> + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">PKWARE + * APPNOTE.TXT, section 4.6.9</a> + * + * @NotThreadSafe super-class is not thread-safe + */ +public class UnicodePathExtraField extends AbstractUnicodeExtraField { + + public static final ZipShort UPATH_ID = new ZipShort(0x7075); + + public UnicodePathExtraField () { + } + + /** + * Assemble as unicode path extension from the name given as + * text as well as the encoded bytes actually written to the archive. + * + * @param text The file name + * @param bytes the bytes actually written to the archive + * @param off The offset of the encoded filename in <code>bytes</code>. + * @param len The length of the encoded filename or comment in + * <code>bytes</code>. + */ + public UnicodePathExtraField(final String text, final byte[] bytes, final int off, final int len) { + super(text, bytes, off, len); + } + + /** + * Assemble as unicode path extension from the name given as + * text as well as the encoded bytes actually written to the archive. + * + * @param name The file name + * @param bytes the bytes actually written to the archive + */ + public UnicodePathExtraField(final String name, final byte[] bytes) { + super(name, bytes); + } + + @Override + public ZipShort getHeaderId() { + return UPATH_ID; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/UnixStat.java b/src/main/java/org/apache/commons/compress/archivers/zip/UnixStat.java new file mode 100644 index 000000000..a1b20be39 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/UnixStat.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +/** + * Constants from stat.h on Unix systems. + */ +// CheckStyle:InterfaceIsTypeCheck OFF - backward compatible +public interface UnixStat { + + /** + * Bits used for permissions (and sticky bit) + */ + int PERM_MASK = 07777; + /** + * Bits used to indicate the filesystem object type. + * @since 1.14 + */ + int FILE_TYPE_FLAG = 0170000; + /** + * Indicates symbolic links. + */ + int LINK_FLAG = 0120000; + /** + * Indicates plain files. + */ + int FILE_FLAG = 0100000; + /** + * Indicates directories. + */ + int DIR_FLAG = 040000; + + // ---------------------------------------------------------- + // somewhat arbitrary choices that are quite common for shared + // installations + // ----------------------------------------------------------- + + /** + * Default permissions for symbolic links. + */ + int DEFAULT_LINK_PERM = 0777; + + /** + * Default permissions for directories. + */ + int DEFAULT_DIR_PERM = 0755; + + /** + * Default permissions for plain files. + */ + int DEFAULT_FILE_PERM = 0644; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/UnparseableExtraFieldData.java b/src/main/java/org/apache/commons/compress/archivers/zip/UnparseableExtraFieldData.java new file mode 100644 index 000000000..d7d24dfed --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/UnparseableExtraFieldData.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +/** + * Wrapper for extra field data that doesn't conform to the recommended format of header-tag + size + data. + * + * <p>The header-id is artificial (and not listed as a known ID in <a + * href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">APPNOTE.TXT</a>). Since it isn't used anywhere + * except to satisfy the ZipExtraField contract it shouldn't matter anyway.</p> + * + * @since 1.1 + * @NotThreadSafe + */ +public final class UnparseableExtraFieldData implements ZipExtraField { + private static final ZipShort HEADER_ID = new ZipShort(0xACC1); + + private byte[] localFileData; + private byte[] centralDirectoryData; + + /** + * The Header-ID. + * + * @return a completely arbitrary value that should be ignored. + */ + @Override + public ZipShort getHeaderId() { + return HEADER_ID; + } + + /** + * Length of the complete extra field in the local file data. + * + * @return The LocalFileDataLength value + */ + @Override + public ZipShort getLocalFileDataLength() { + return new ZipShort(localFileData == null ? 0 : localFileData.length); + } + + /** + * Length of the complete extra field in the central directory. + * + * @return The CentralDirectoryLength value + */ + @Override + public ZipShort getCentralDirectoryLength() { + return centralDirectoryData == null + ? getLocalFileDataLength() + : new ZipShort(centralDirectoryData.length); + } + + /** + * The actual data to put into local file data. + * + * @return The LocalFileDataData value + */ + @Override + public byte[] getLocalFileDataData() { + return ZipUtil.copy(localFileData); + } + + /** + * The actual data to put into central directory. + * + * @return The CentralDirectoryData value + */ + @Override + public byte[] getCentralDirectoryData() { + return centralDirectoryData == null + ? getLocalFileDataData() : ZipUtil.copy(centralDirectoryData); + } + + /** + * Populate data from this array as if it was in local file data. + * + * @param buffer the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + */ + @Override + public void parseFromLocalFileData(final byte[] buffer, final int offset, final int length) { + localFileData = new byte[length]; + System.arraycopy(buffer, offset, localFileData, 0, length); + } + + /** + * Populate data from this array as if it was in central directory data. + * + * @param buffer the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + */ + @Override + public void parseFromCentralDirectoryData(final byte[] buffer, final int offset, + final int length) { + centralDirectoryData = new byte[length]; + System.arraycopy(buffer, offset, centralDirectoryData, 0, length); + if (localFileData == null) { + parseFromLocalFileData(buffer, offset, length); + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/UnrecognizedExtraField.java b/src/main/java/org/apache/commons/compress/archivers/zip/UnrecognizedExtraField.java new file mode 100644 index 000000000..f8ea8b973 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/UnrecognizedExtraField.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +/** + * Simple placeholder for all those extra fields we don't want to deal + * with. + * + * <p>Assumes local file data and central directory entries are + * identical - unless told the opposite.</p> + * @NotThreadSafe + */ +public class UnrecognizedExtraField implements ZipExtraField { + + /** + * The Header-ID. + */ + private ZipShort headerId; + + /** + * Set the header id. + * @param headerId the header id to use + */ + public void setHeaderId(final ZipShort headerId) { + this.headerId = headerId; + } + + /** + * Get the header id. + * @return the header id + */ + @Override + public ZipShort getHeaderId() { + return headerId; + } + + /** + * Extra field data in local file data - without + * Header-ID or length specifier. + */ + private byte[] localData; + + /** + * Set the extra field data in the local file data - + * without Header-ID or length specifier. + * @param data the field data to use + */ + public void setLocalFileDataData(final byte[] data) { + localData = ZipUtil.copy(data); + } + + /** + * Get the length of the local data. + * @return the length of the local data + */ + @Override + public ZipShort getLocalFileDataLength() { + return new ZipShort(localData != null ? localData.length : 0); + } + + /** + * Get the local data. + * @return the local data + */ + @Override + public byte[] getLocalFileDataData() { + return ZipUtil.copy(localData); + } + + /** + * Extra field data in central directory - without + * Header-ID or length specifier. + */ + private byte[] centralData; + + /** + * Set the extra field data in central directory. + * @param data the data to use + */ + public void setCentralDirectoryData(final byte[] data) { + centralData = ZipUtil.copy(data); + } + + /** + * Get the central data length. + * If there is no central data, get the local file data length. + * @return the central data length + */ + @Override + public ZipShort getCentralDirectoryLength() { + if (centralData != null) { + return new ZipShort(centralData.length); + } + return getLocalFileDataLength(); + } + + /** + * Get the central data. + * @return the central data if present, else return the local file data + */ + @Override + public byte[] getCentralDirectoryData() { + if (centralData != null) { + return ZipUtil.copy(centralData); + } + return getLocalFileDataData(); + } + + /** + * @param data the array of bytes. + * @param offset the source location in the data array. + * @param length the number of bytes to use in the data array. + * @see ZipExtraField#parseFromLocalFileData(byte[], int, int) + */ + @Override + public void parseFromLocalFileData(final byte[] data, final int offset, final int length) { + final byte[] tmp = new byte[length]; + System.arraycopy(data, offset, tmp, 0, length); + setLocalFileDataData(tmp); + } + + /** + * @param data the array of bytes. + * @param offset the source location in the data array. + * @param length the number of bytes to use in the data array. + * @see ZipExtraField#parseFromCentralDirectoryData(byte[], int, int) + */ + @Override + public void parseFromCentralDirectoryData(final byte[] data, final int offset, + final int length) { + final byte[] tmp = new byte[length]; + System.arraycopy(data, offset, tmp, 0, length); + setCentralDirectoryData(tmp); + if (localData == null) { + setLocalFileDataData(tmp); + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java new file mode 100644 index 000000000..11c904cad --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; + +import org.apache.commons.compress.compressors.lzw.LZWInputStream; + +/** + * Input stream that decompresses ZIP method 1 (unshrinking). A variation of the LZW algorithm, with some twists. + * @NotThreadSafe + * @since 1.7 + */ +class UnshrinkingInputStream extends LZWInputStream { + private static final int MAX_CODE_SIZE = 13; + private static final int MAX_TABLE_SIZE = 1 << MAX_CODE_SIZE; + private final boolean[] isUsed; + + /** + * IOException is not actually thrown! + * + * @param inputStream + * @throws IOException IOException is not actually thrown! + */ + public UnshrinkingInputStream(final InputStream inputStream) throws IOException { + super(inputStream, ByteOrder.LITTLE_ENDIAN); + setClearCode(DEFAULT_CODE_SIZE); + initializeTables(MAX_CODE_SIZE); + isUsed = new boolean[getPrefixesLength()]; + for (int i = 0; i < (1 << 8); i++) { + isUsed[i] = true; + } + setTableSize(getClearCode() + 1); + } + + @Override + protected int addEntry(final int previousCode, final byte character) throws IOException { + int tableSize = getTableSize(); + while ((tableSize < MAX_TABLE_SIZE) && isUsed[tableSize]) { + tableSize++; + } + setTableSize(tableSize); + final int idx = addEntry(previousCode, character, MAX_TABLE_SIZE); + if (idx >= 0) { + isUsed[idx] = true; + } + return idx; + } + + private void partialClear() { + final boolean[] isParent = new boolean[MAX_TABLE_SIZE]; + for (int i = 0; i < isUsed.length; i++) { + if (isUsed[i] && getPrefix(i) != UNUSED_PREFIX) { + isParent[getPrefix(i)] = true; + } + } + for (int i = getClearCode() + 1; i < isParent.length; i++) { + if (!isParent[i]) { + isUsed[i] = false; + setPrefix(i, UNUSED_PREFIX); + } + } + } + + @Override + protected int decompressNextSymbol() throws IOException { + // + // table entry table entry + // _____________ _____ + // table entry / \ / \ + // ____________/ \ \ + // / / \ / \ \ + // +---+---+---+---+---+---+---+---+---+---+ + // | . | . | . | . | . | . | . | . | . | . | + // +---+---+---+---+---+---+---+---+---+---+ + // |<--------->|<------------->|<----->|<->| + // symbol symbol symbol symbol + // + final int code = readNextCode(); + if (code < 0) { + return -1; + } else if (code == getClearCode()) { + final int subCode = readNextCode(); + if (subCode < 0) { + throw new IOException("Unexpected EOF;"); + } else if (subCode == 1) { + if (getCodeSize() < MAX_CODE_SIZE) { + incrementCodeSize(); + } else { + throw new IOException("Attempt to increase code size beyond maximum"); + } + } else if (subCode == 2) { + partialClear(); + setTableSize(getClearCode() + 1); + } else { + throw new IOException("Invalid clear code subcode " + subCode); + } + return 0; + } else { + boolean addedUnfinishedEntry = false; + int effectiveCode = code; + if (!isUsed[code]) { + effectiveCode = addRepeatOfPreviousCode(); + addedUnfinishedEntry = true; + } + return expandCodeToOutputStack(effectiveCode, addedUnfinishedEntry); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/UnsupportedZipFeatureException.java b/src/main/java/org/apache/commons/compress/archivers/zip/UnsupportedZipFeatureException.java new file mode 100644 index 000000000..a92f9c71a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/UnsupportedZipFeatureException.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import java.util.zip.ZipException; + +/** + * Exception thrown when attempting to read or write data for a zip + * entry that uses ZIP features not supported by this library. + * @since 1.1 + */ +public class UnsupportedZipFeatureException extends ZipException { + + private final Feature reason; + private transient final ZipArchiveEntry entry; + private static final long serialVersionUID = 20161219L; + + /** + * Creates an exception. + * @param reason the feature that is not supported + * @param entry the entry using the feature + */ + public UnsupportedZipFeatureException(final Feature reason, + final ZipArchiveEntry entry) { + super("unsupported feature " + reason + " used in entry " + + entry.getName()); + this.reason = reason; + this.entry = entry; + } + + /** + * Creates an exception for archives that use an unsupported + * compression algorithm. + * @param method the method that is not supported + * @param entry the entry using the feature + * @since 1.5 + */ + public UnsupportedZipFeatureException(final ZipMethod method, + final ZipArchiveEntry entry) { + super("unsupported feature method '" + method.name() + + "' used in entry " + entry.getName()); + this.reason = Feature.METHOD; + this.entry = entry; + } + + /** + * Creates an exception when the whole archive uses an unsupported + * feature. + * + * @param reason the feature that is not supported + * @since 1.5 + */ + public UnsupportedZipFeatureException(final Feature reason) { + super("unsupported feature " + reason + " used in archive."); + this.reason = reason; + this.entry = null; + } + + /** + * The unsupported feature that has been used. + * @return The unsupported feature that has been used. + */ + public Feature getFeature() { + return reason; + } + + /** + * The entry using the unsupported feature. + * @return The entry using the unsupported feature. + */ + public ZipArchiveEntry getEntry() { + return entry; + } + + /** + * ZIP Features that may or may not be supported. + * @since 1.1 + */ + public static class Feature implements java.io.Serializable { + + private static final long serialVersionUID = 4112582948775420359L; + /** + * The entry is encrypted. + */ + public static final Feature ENCRYPTION = new Feature("encryption"); + /** + * The entry used an unsupported compression method. + */ + public static final Feature METHOD = new Feature("compression method"); + /** + * The entry uses a data descriptor. + */ + public static final Feature DATA_DESCRIPTOR = new Feature("data descriptor"); + /** + * The archive uses splitting or spanning. + * @since 1.5 + */ + public static final Feature SPLITTING = new Feature("splitting"); + /** + * The archive contains entries with unknown compressed size + * for a compression method that doesn't support detection of + * the end of the compressed stream. + * @since 1.16 + */ + public static final Feature UNKNOWN_COMPRESSED_SIZE = new Feature("unknown compressed size"); + + private final String name; + + private Feature(final String name) { + this.name = name; + } + + @Override + public String toString() { + return name; + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/X000A_NTFS.java b/src/main/java/org/apache/commons/compress/archivers/zip/X000A_NTFS.java new file mode 100644 index 000000000..2dd5c33e6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/X000A_NTFS.java @@ -0,0 +1,394 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.util.Date; +import java.util.zip.ZipException; + +/** + * NTFS extra field that was thought to store various attributes but + * in reality only stores timestamps. + * + * <pre> + * 4.5.5 -NTFS Extra Field (0x000a): + * + * The following is the layout of the NTFS attributes + * "extra" block. (Note: At this time the Mtime, Atime + * and Ctime values MAY be used on any WIN32 system.) + * + * Note: all fields stored in Intel low-byte/high-byte order. + * + * Value Size Description + * ----- ---- ----------- + * (NTFS) 0x000a 2 bytes Tag for this "extra" block type + * TSize 2 bytes Size of the total "extra" block + * Reserved 4 bytes Reserved for future use + * Tag1 2 bytes NTFS attribute tag value #1 + * Size1 2 bytes Size of attribute #1, in bytes + * (var) Size1 Attribute #1 data + * . + * . + * . + * TagN 2 bytes NTFS attribute tag value #N + * SizeN 2 bytes Size of attribute #N, in bytes + * (var) SizeN Attribute #N data + * + * For NTFS, values for Tag1 through TagN are as follows: + * (currently only one set of attributes is defined for NTFS) + * + * Tag Size Description + * ----- ---- ----------- + * 0x0001 2 bytes Tag for attribute #1 + * Size1 2 bytes Size of attribute #1, in bytes + * Mtime 8 bytes File last modification time + * Atime 8 bytes File last access time + * Ctime 8 bytes File creation time + * </pre> + * + * @since 1.11 + * @NotThreadSafe + */ +public class X000A_NTFS implements ZipExtraField { + private static final ZipShort HEADER_ID = new ZipShort(0x000a); + private static final ZipShort TIME_ATTR_TAG = new ZipShort(0x0001); + private static final ZipShort TIME_ATTR_SIZE = new ZipShort(3 * 8); + + private ZipEightByteInteger modifyTime = ZipEightByteInteger.ZERO; + private ZipEightByteInteger accessTime = ZipEightByteInteger.ZERO; + private ZipEightByteInteger createTime = ZipEightByteInteger.ZERO; + + /** + * The Header-ID. + * + * @return the value for the header id for this extrafield + */ + @Override + public ZipShort getHeaderId() { + return HEADER_ID; + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * + * @return a <code>ZipShort</code> for the length of the data of this extra field + */ + @Override + public ZipShort getLocalFileDataLength() { + return new ZipShort(4 /* reserved */ + + 2 /* Tag#1 */ + + 2 /* Size#1 */ + + 3 * 8 /* time values */); + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * + * <p>For X5455 the central length is often smaller than the + * local length, because central cannot contain access or create + * timestamps.</p> + * + * @return a <code>ZipShort</code> for the length of the data of this extra field + */ + @Override + public ZipShort getCentralDirectoryLength() { + return getLocalFileDataLength(); + } + + /** + * The actual data to put into local file data - without Header-ID + * or length specifier. + * + * @return get the data + */ + @Override + public byte[] getLocalFileDataData() { + final byte[] data = new byte[getLocalFileDataLength().getValue()]; + int pos = 4; + System.arraycopy(TIME_ATTR_TAG.getBytes(), 0, data, pos, 2); + pos += 2; + System.arraycopy(TIME_ATTR_SIZE.getBytes(), 0, data, pos, 2); + pos += 2; + System.arraycopy(modifyTime.getBytes(), 0, data, pos, 8); + pos += 8; + System.arraycopy(accessTime.getBytes(), 0, data, pos, 8); + pos += 8; + System.arraycopy(createTime.getBytes(), 0, data, pos, 8); + return data; + } + + /** + * The actual data to put into central directory data - without Header-ID + * or length specifier. + * + * @return the central directory data + */ + @Override + public byte[] getCentralDirectoryData() { + return getLocalFileDataData(); + } + + /** + * Populate data from this array as if it was in local file data. + * + * @param data an array of bytes + * @param offset the start offset + * @param length the number of bytes in the array from offset + * @throws java.util.zip.ZipException on error + */ + @Override + public void parseFromLocalFileData( + final byte[] data, int offset, final int length + ) throws ZipException { + final int len = offset + length; + + // skip reserved + offset += 4; + + while (offset + 4 <= len) { + final ZipShort tag = new ZipShort(data, offset); + offset += 2; + if (tag.equals(TIME_ATTR_TAG)) { + readTimeAttr(data, offset, len - offset); + break; + } + final ZipShort size = new ZipShort(data, offset); + offset += 2 + size.getValue(); + } + } + + /** + * Doesn't do anything special since this class always uses the + * same parsing logic for both central directory and local file data. + */ + @Override + public void parseFromCentralDirectoryData( + final byte[] buffer, final int offset, final int length + ) throws ZipException { + reset(); + parseFromLocalFileData(buffer, offset, length); + } + + /** + * Returns the "File last modification time" of this zip entry as + * a ZipEightByteInteger object, or {@link + * ZipEightByteInteger#ZERO} if no such timestamp exists in the + * zip entry. + * + * @return File last modification time + */ + public ZipEightByteInteger getModifyTime() { return modifyTime; } + + /** + * Returns the "File last access time" of this zip entry as a + * ZipEightByteInteger object, or {@link ZipEightByteInteger#ZERO} + * if no such timestamp exists in the zip entry. + * + * @return File last access time + */ + public ZipEightByteInteger getAccessTime() { return accessTime; } + + /** + * Returns the "File creation time" of this zip entry as a + * ZipEightByteInteger object, or {@link ZipEightByteInteger#ZERO} + * if no such timestamp exists in the zip entry. + * + * @return File creation time + */ + public ZipEightByteInteger getCreateTime() { return createTime; } + + /** + * Returns the modify time as a java.util.Date + * of this zip entry, or null if no such timestamp exists in the zip entry. + * + * @return modify time as java.util.Date or null. + */ + public Date getModifyJavaTime() { + return zipToDate(modifyTime); + } + + /** + * Returns the access time as a java.util.Date + * of this zip entry, or null if no such timestamp exists in the zip entry. + * + * @return access time as java.util.Date or null. + */ + public Date getAccessJavaTime() { + return zipToDate(accessTime); + } + + /** + * Returns the create time as a a java.util.Date of this zip + * entry, or null if no such timestamp exists in the zip entry. + * + * @return create time as java.util.Date or null. + */ + public Date getCreateJavaTime() { + return zipToDate(createTime); + } + + /** + * Sets the File last modification time of this zip entry using a + * ZipEightByteInteger object. + * + * @param t ZipEightByteInteger of the modify time + */ + public void setModifyTime(final ZipEightByteInteger t) { + modifyTime = t == null ? ZipEightByteInteger.ZERO : t; + } + + /** + * Sets the File last access time of this zip entry using a + * ZipEightByteInteger object. + * + * @param t ZipEightByteInteger of the access time + */ + public void setAccessTime(final ZipEightByteInteger t) { + accessTime = t == null ? ZipEightByteInteger.ZERO : t; + } + + /** + * Sets the File creation time of this zip entry using a + * ZipEightByteInteger object. + * + * @param t ZipEightByteInteger of the create time + */ + public void setCreateTime(final ZipEightByteInteger t) { + createTime = t == null ? ZipEightByteInteger.ZERO : t; + } + + /** + * Sets the modify time as a java.util.Date of this zip entry. + * + * @param d modify time as java.util.Date + */ + public void setModifyJavaTime(final Date d) { setModifyTime(dateToZip(d)); } + + /** + * Sets the access time as a java.util.Date + * of this zip entry. + * + * @param d access time as java.util.Date + */ + public void setAccessJavaTime(final Date d) { setAccessTime(dateToZip(d)); } + + /** + * <p> + * Sets the create time as a java.util.Date + * of this zip entry. Supplied value is truncated to per-second + * precision (milliseconds zeroed-out). + * </p><p> + * Note: the setters for flags and timestamps are decoupled. + * Even if the timestamp is not-null, it will only be written + * out if the corresponding bit in the flags is also set. + * </p> + * + * @param d create time as java.util.Date + */ + public void setCreateJavaTime(final Date d) { setCreateTime(dateToZip(d)); } + + /** + * Returns a String representation of this class useful for + * debugging purposes. + * + * @return A String representation of this class useful for + * debugging purposes. + */ + @Override + public String toString() { + final StringBuilder buf = new StringBuilder(); + buf.append("0x000A Zip Extra Field:") + .append(" Modify:[").append(getModifyJavaTime()).append("] ") + .append(" Access:[").append(getAccessJavaTime()).append("] ") + .append(" Create:[").append(getCreateJavaTime()).append("] "); + return buf.toString(); + } + + @Override + public boolean equals(final Object o) { + if (o instanceof X000A_NTFS) { + final X000A_NTFS xf = (X000A_NTFS) o; + + return (modifyTime == xf.modifyTime || (modifyTime != null && modifyTime.equals(xf.modifyTime))) && + (accessTime == xf.accessTime || (accessTime != null && accessTime.equals(xf.accessTime))) && + (createTime == xf.createTime || (createTime != null && createTime.equals(xf.createTime))); + } + return false; + } + + @Override + public int hashCode() { + int hc = -123; + if (modifyTime != null) { + hc ^= modifyTime.hashCode(); + } + if (accessTime != null) { + // Since accessTime is often same as modifyTime, + // this prevents them from XOR negating each other. + hc ^= Integer.rotateLeft(accessTime.hashCode(), 11); + } + if (createTime != null) { + hc ^= Integer.rotateLeft(createTime.hashCode(), 22); + } + return hc; + } + + /** + * Reset state back to newly constructed state. Helps us make sure + * parse() calls always generate clean results. + */ + private void reset() { + this.modifyTime = ZipEightByteInteger.ZERO; + this.accessTime = ZipEightByteInteger.ZERO; + this.createTime = ZipEightByteInteger.ZERO; + } + + private void readTimeAttr(final byte[] data, int offset, final int length) { + if (length >= 2 + 3 * 8) { + final ZipShort tagValueLength = new ZipShort(data, offset); + if (TIME_ATTR_SIZE.equals(tagValueLength)) { + offset += 2; + modifyTime = new ZipEightByteInteger(data, offset); + offset += 8; + accessTime = new ZipEightByteInteger(data, offset); + offset += 8; + createTime = new ZipEightByteInteger(data, offset); + } + } + } + + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms724290%28v=vs.85%29.aspx + // A file time is a 64-bit value that represents the number of + // 100-nanosecond intervals that have elapsed since 12:00 + // A.M. January 1, 1601 Coordinated Universal Time (UTC). + // this is the offset of Windows time 0 to Unix epoch in 100-nanosecond intervals + private static final long EPOCH_OFFSET = -116444736000000000L; + + private static ZipEightByteInteger dateToZip(final Date d) { + if (d == null) { return null; } + return new ZipEightByteInteger((d.getTime() * 10000L) - EPOCH_OFFSET); + } + + private static Date zipToDate(final ZipEightByteInteger z) { + if (z == null || ZipEightByteInteger.ZERO.equals(z)) { return null; } + final long l = (z.getLongValue() + EPOCH_OFFSET) / 10000L; + return new Date(l); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/X0014_X509Certificates.java b/src/main/java/org/apache/commons/compress/archivers/zip/X0014_X509Certificates.java new file mode 100644 index 000000000..3be78638e --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/X0014_X509Certificates.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +/** + * PKCS#7 Store for X.509 Certificates (0x0014). + * + * <p>This field MUST contain information about each of the certificates files may + * be signed with. When the Central Directory Encryption feature is enabled for + * a ZIP file, this record will appear in the Archive Extra Data Record, + * otherwise it will appear in the first central directory record and will be + * ignored in any other record.</p> + * + * <p>Note: all fields stored in Intel low-byte/high-byte order.</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * (Store) 0x0014 2 bytes Tag for this "extra" block type + * TSize 2 bytes Size of the store data + * TData TSize Data about the store + * </pre> + * + * @NotThreadSafe + * @since 1.11 + */ +public class X0014_X509Certificates extends PKWareExtraHeader { + + public X0014_X509Certificates() { + super(new ZipShort(0x0014)); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/X0015_CertificateIdForFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/X0015_CertificateIdForFile.java new file mode 100644 index 000000000..89b327b94 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/X0015_CertificateIdForFile.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +/** + * X.509 Certificate ID and Signature for individual file (0x0015). + * + * <p>This field contains the information about which certificate in the PKCS#7 + * store was used to sign a particular file. It also contains the signature + * data. This field can appear multiple times, but can only appear once per + * certificate.</p> + * + * <p>Note: all fields stored in Intel low-byte/high-byte order.</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * (CID) 0x0015 2 bytes Tag for this "extra" block type + * TSize 2 bytes Size of data that follows + * RCount 4 bytes Number of recipients. (inferred) + * HashAlg 2 bytes Hash algorithm identifier. (inferred) + * TData TSize Signature Data + * </pre> + * + * @NotThreadSafe + * @since 1.11 + */ +public class X0015_CertificateIdForFile extends PKWareExtraHeader { + + public X0015_CertificateIdForFile() { + super(new ZipShort(0x0015)); + } + + private int rcount; + private HashAlgorithm hashAlg; + + /** + * Get record count. + * @return the record count + */ + public int getRecordCount() { + return rcount; + } + + /** + * Get hash algorithm. + * @return the hash algorithm + */ + public HashAlgorithm getHashAlgorithm() { + return hashAlg; + } + + @Override + public void parseFromCentralDirectoryData(final byte[] data, final int offset, final int length) { + super.parseFromCentralDirectoryData(data, offset, length); + this.rcount = ZipShort.getValue(data, offset); + this.hashAlg = HashAlgorithm.getAlgorithmByCode(ZipShort.getValue(data, offset + 2)); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/X0016_CertificateIdForCentralDirectory.java b/src/main/java/org/apache/commons/compress/archivers/zip/X0016_CertificateIdForCentralDirectory.java new file mode 100644 index 000000000..bab1e6164 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/X0016_CertificateIdForCentralDirectory.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +/** + * X.509 Certificate ID and Signature for central directory (0x0016). + * + * <p>This field contains the information about which certificate in the PKCS#7 + * store was used to sign the central directory structure. When the Central + * Directory Encryption feature is enabled for a ZIP file, this record will + * appear in the Archive Extra Data Record, otherwise it will appear in the + * first central directory record.</p> + * + * <p>Note: all fields stored in Intel low-byte/high-byte order.</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * (CDID) 0x0016 2 bytes Tag for this "extra" block type + * TSize 2 bytes Size of data that follows + * RCount 4 bytes Number of recipients. (inferred) + * HashAlg 2 bytes Hash algorithm identifier. (inferred) + * TData TSize Data + * </pre> + * + * @NotThreadSafe + * @since 1.11 + */ +public class X0016_CertificateIdForCentralDirectory extends PKWareExtraHeader { + + public X0016_CertificateIdForCentralDirectory() { + super(new ZipShort(0x0016)); + } + + private int rcount; + private HashAlgorithm hashAlg; + + /** + * Get record count. + * @return the record count + */ + public int getRecordCount() { + return rcount; + } + + /** + * Get hash algorithm. + * @return the hash algorithm + */ + public HashAlgorithm getHashAlgorithm() { + return hashAlg; + } + + @Override + public void parseFromCentralDirectoryData(final byte[] data, final int offset, final int length) { + this.rcount = ZipShort.getValue(data, offset); + this.hashAlg = HashAlgorithm.getAlgorithmByCode(ZipShort.getValue(data, offset + 2)); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/X0017_StrongEncryptionHeader.java b/src/main/java/org/apache/commons/compress/archivers/zip/X0017_StrongEncryptionHeader.java new file mode 100644 index 000000000..acc3b2234 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/X0017_StrongEncryptionHeader.java @@ -0,0 +1,382 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +/** + * Strong Encryption Header (0x0017). + * + * <p>Certificate-based encryption:</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * 0x0017 2 bytes Tag for this "extra" block type + * TSize 2 bytes Size of data that follows + * Format 2 bytes Format definition for this record + * AlgID 2 bytes Encryption algorithm identifier + * Bitlen 2 bytes Bit length of encryption key (32-448 bits) + * Flags 2 bytes Processing flags + * RCount 4 bytes Number of recipients. + * HashAlg 2 bytes Hash algorithm identifier + * HSize 2 bytes Hash size + * SRList (var) Simple list of recipients hashed public keys + * + * Flags - This defines the processing flags. + * </pre> + * + * <ul> + * <li>0x0007 - reserved for future use + * <li>0x000F - reserved for future use + * <li>0x0100 - Indicates non-OAEP key wrapping was used. If this + * this field is set, the version needed to extract must + * be at least 61. This means OAEP key wrapping is not + * used when generating a Master Session Key using + * ErdData. + * <li>0x4000 - ErdData must be decrypted using 3DES-168, otherwise use the + * same algorithm used for encrypting the file contents. + * <li>0x8000 - reserved for future use + * </ul> + * + * <pre> + * RCount - This defines the number intended recipients whose + * public keys were used for encryption. This identifies + * the number of elements in the SRList. + * + * see also: reserved1 + * + * HashAlg - This defines the hash algorithm used to calculate + * the public key hash of each public key used + * for encryption. This field currently supports + * only the following value for SHA-1 + * + * 0x8004 - SHA1 + * + * HSize - This defines the size of a hashed public key. + * + * SRList - This is a variable length list of the hashed + * public keys for each intended recipient. Each + * element in this list is HSize. The total size of + * SRList is determined using RCount * HSize. + * </pre> + * + * <p>Password-based Extra Field 0x0017 in central header only.</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * 0x0017 2 bytes Tag for this "extra" block type + * TSize 2 bytes Size of data that follows + * Format 2 bytes Format definition for this record + * AlgID 2 bytes Encryption algorithm identifier + * Bitlen 2 bytes Bit length of encryption key (32-448 bits) + * Flags 2 bytes Processing flags + * (more?) + * </pre> + * + * <p><b>Format</b> - the data format identifier for this record. The only value + * allowed at this time is the integer value 2.</p> + * + * <p>Password-based Extra Field 0x0017 preceding compressed file data.</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * 0x0017 2 bytes Tag for this "extra" block type + * IVSize 2 bytes Size of initialization vector (IV) + * IVData IVSize Initialization vector for this file + * Size 4 bytes Size of remaining decryption header data + * Format 2 bytes Format definition for this record + * AlgID 2 bytes Encryption algorithm identifier + * Bitlen 2 bytes Bit length of encryption key (32-448 bits) + * Flags 2 bytes Processing flags + * ErdSize 2 bytes Size of Encrypted Random Data + * ErdData ErdSize Encrypted Random Data + * Reserved1 4 bytes Reserved certificate processing data + * Reserved2 (var) Reserved for certificate processing data + * VSize 2 bytes Size of password validation data + * VData VSize-4 Password validation data + * VCRC32 4 bytes Standard ZIP CRC32 of password validation data + * + * IVData - The size of the IV should match the algorithm block size. + * The IVData can be completely random data. If the size of + * the randomly generated data does not match the block size + * it should be complemented with zero's or truncated as + * necessary. If IVSize is 0,then IV = CRC32 + Uncompressed + * File Size (as a 64 bit little-endian, unsigned integer value). + * + * Format - the data format identifier for this record. The only + * value allowed at this time is the integer value 2. + * + * ErdData - Encrypted random data is used to store random data that + * is used to generate a file session key for encrypting + * each file. SHA1 is used to calculate hash data used to + * derive keys. File session keys are derived from a master + * session key generated from the user-supplied password. + * If the Flags field in the decryption header contains + * the value 0x4000, then the ErdData field must be + * decrypted using 3DES. If the value 0x4000 is not set, + * then the ErdData field must be decrypted using AlgId. + * + * Reserved1 - Reserved for certificate processing, if value is + * zero, then Reserved2 data is absent. See the explanation + * under the Certificate Processing Method for details on + * this data structure. + * + * Reserved2 - If present, the size of the Reserved2 data structure + * is located by skipping the first 4 bytes of this field + * and using the next 2 bytes as the remaining size. See + * the explanation under the Certificate Processing Method + * for details on this data structure. + * + * VSize - This size value will always include the 4 bytes of the + * VCRC32 data and will be greater than 4 bytes. + * + * VData - Random data for password validation. This data is VSize + * in length and VSize must be a multiple of the encryption + * block size. VCRC32 is a checksum value of VData. + * VData and VCRC32 are stored encrypted and start the + * stream of encrypted data for a file. + * </pre> + * + * <p>Reserved1 - Certificate Decryption Header Reserved1 Data:</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * RCount 4 bytes Number of recipients. + * </pre> + * + * <p>RCount - This defines the number intended recipients whose public keys were + * used for encryption. This defines the number of elements in the REList field + * defined below.</p> + * + * <p>Reserved2 - Certificate Decryption Header Reserved2 Data Structures:</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * HashAlg 2 bytes Hash algorithm identifier + * HSize 2 bytes Hash size + * REList (var) List of recipient data elements + * + * HashAlg - This defines the hash algorithm used to calculate + * the public key hash of each public key used + * for encryption. This field currently supports + * only the following value for SHA-1 + * + * 0x8004 - SHA1 + * + * HSize - This defines the size of a hashed public key + * defined in REHData. + * + * REList - This is a variable length of list of recipient data. + * Each element in this list consists of a Recipient + * Element data structure as follows: + * </pre> + * + * <p>Recipient Element (REList) Data Structure:</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * RESize 2 bytes Size of REHData + REKData + * REHData HSize Hash of recipients public key + * REKData (var) Simple key blob + * + * + * RESize - This defines the size of an individual REList + * element. This value is the combined size of the + * REHData field + REKData field. REHData is defined by + * HSize. REKData is variable and can be calculated + * for each REList element using RESize and HSize. + * + * REHData - Hashed public key for this recipient. + * + * REKData - Simple Key Blob. The format of this data structure + * is identical to that defined in the Microsoft + * CryptoAPI and generated using the CryptExportKey() + * function. The version of the Simple Key Blob + * supported at this time is 0x02 as defined by + * Microsoft. + * + * For more details see https://msdn.microsoft.com/en-us/library/aa920051.aspx + * </pre> + * + * <p><b>Flags</b> - Processing flags needed for decryption</p> + * + * <ul> + * <li>0x0001 - Password is required to decrypt</li> + * <li>0x0002 - Certificates only</li> + * <li>0x0003 - Password or certificate required to decrypt</li> + * <li>0x0007 - reserved for future use + * <li>0x000F - reserved for future use + * <li>0x0100 - indicates non-OAEP key wrapping was used. If this field is set + * the version needed to extract must be at least 61. This means OAEP key + * wrapping is not used when generating a Master Session Key using ErdData. + * <li>0x4000 - ErdData must be decrypted using 3DES-168, otherwise use the same + * algorithm used for encrypting the file contents. + * <li>0x8000 - reserved for future use. + * </ul> + * + * <p><b>See the section describing the Strong Encryption Specification for + * details. Refer to the section in this document entitled + * "Incorporating PKWARE Proprietary Technology into Your Product" for more + * information.</b></p> + * + * @NotThreadSafe + * @since 1.11 + */ +public class X0017_StrongEncryptionHeader extends PKWareExtraHeader { + + public X0017_StrongEncryptionHeader() { + super(new ZipShort(0x0017)); + } + + private int format; // TODO written but not read + private EncryptionAlgorithm algId; + private int bitlen; // TODO written but not read + private int flags; // TODO written but not read + private long rcount; + private HashAlgorithm hashAlg; + private int hashSize; + + // encryption data + private byte ivData[]; + private byte erdData[]; + + // encryption key + private byte recipientKeyHash[]; + private byte keyBlob[]; + + // password verification data + private byte vData[]; + private byte vCRC32[]; + + /** + * Get record count. + * @return the record count + */ + public long getRecordCount() { + return rcount; + } + + /** + * Get hash algorithm. + * @return the hash algorithm + */ + public HashAlgorithm getHashAlgorithm() { + return hashAlg; + } + + /** + * Get encryption algorithm. + * @return the encryption algorithm + */ + public EncryptionAlgorithm getEncryptionAlgorithm() { + return algId; + } + + /** + * Parse central directory format. + * + * @param data the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + */ + public void parseCentralDirectoryFormat(final byte[] data, final int offset, final int length) { + this.format = ZipShort.getValue(data, offset); + this.algId = EncryptionAlgorithm.getAlgorithmByCode(ZipShort.getValue(data, offset + 2)); + this.bitlen = ZipShort.getValue(data, offset + 4); + this.flags = ZipShort.getValue(data, offset + 6); + this.rcount = ZipLong.getValue(data, offset + 8); + + if (rcount > 0) { + this.hashAlg = HashAlgorithm.getAlgorithmByCode(ZipShort.getValue(data, offset + 12)); + this.hashSize = ZipShort.getValue(data, offset + 14); + // srlist... hashed public keys + for (long i = 0; i < this.rcount; i++) { + for (int j = 0; j < this.hashSize; j++) { + // ZipUtil.signedByteToUnsignedInt(data[offset + 16 + (i * this.hashSize) + j])); + } + } + } + } + + /** + * Parse file header format. + * + * <p>(Password only?)</p> + * + * @param data the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + */ + public void parseFileFormat(final byte[] data, final int offset, final int length) { + final int ivSize = ZipShort.getValue(data, offset); + this.ivData = new byte[ivSize]; + System.arraycopy(data, offset + 4, this.ivData, 0, ivSize); + + this.format = ZipShort.getValue(data, offset + ivSize + 6); + this.algId = EncryptionAlgorithm.getAlgorithmByCode(ZipShort.getValue(data, offset + ivSize + 8)); + this.bitlen = ZipShort.getValue(data, offset + ivSize + 10); + this.flags = ZipShort.getValue(data, offset + ivSize + 12); + + final int erdSize = ZipShort.getValue(data, offset + ivSize + 14); + this.erdData = new byte[erdSize]; + System.arraycopy(data, offset + ivSize + 16, this.erdData, 0, erdSize); + + this.rcount = ZipLong.getValue(data, offset + ivSize + 16 + erdSize); + System.out.println("rcount: " + rcount); + if (rcount == 0) { + final int vSize = ZipShort.getValue(data, offset + ivSize + 20 + erdSize); + this.vData = new byte[vSize - 4]; + this.vCRC32 = new byte[4]; + System.arraycopy(data, offset + ivSize + 22 + erdSize , this.vData, 0, vSize - 4); + System.arraycopy(data, offset + ivSize + 22 + erdSize + vSize - 4, vCRC32, 0, 4); + } else { + this.hashAlg = HashAlgorithm.getAlgorithmByCode(ZipShort.getValue(data, offset + ivSize + 20 + erdSize)); + this.hashSize = ZipShort.getValue(data, offset + ivSize + 22 + erdSize); + final int resize = ZipShort.getValue(data, offset + ivSize + 24 + erdSize); + this.recipientKeyHash = new byte[this.hashSize]; + this.keyBlob = new byte[resize - this.hashSize]; + System.arraycopy(data, offset + ivSize + 24 + erdSize, this.recipientKeyHash, 0, this.hashSize); + System.arraycopy(data, offset + ivSize + 24 + erdSize + this.hashSize, this.keyBlob, 0, resize - this.hashSize); + + final int vSize = ZipShort.getValue(data, offset + ivSize + 26 + erdSize + resize); + this.vData = new byte[vSize - 4]; + this.vCRC32 = new byte[4]; + System.arraycopy(data, offset + ivSize + 22 + erdSize + resize, this.vData, 0, vSize - 4); + System.arraycopy(data, offset + ivSize + 22 + erdSize + resize + vSize - 4, vCRC32, 0, 4); + } + + // validate values? + } + + @Override + public void parseFromLocalFileData(final byte[] data, final int offset, final int length) { + super.parseFromLocalFileData(data, offset, length); + parseFileFormat(data, offset, length); + } + + @Override + public void parseFromCentralDirectoryData(final byte[] data, final int offset, final int length) { + super.parseFromCentralDirectoryData(data, offset, length); + parseCentralDirectoryFormat(data, offset, length); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/X0019_EncryptionRecipientCertificateList.java b/src/main/java/org/apache/commons/compress/archivers/zip/X0019_EncryptionRecipientCertificateList.java new file mode 100644 index 000000000..825507944 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/X0019_EncryptionRecipientCertificateList.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +/** + * PKCS#7 Encryption Recipient Certificate List (0x0019). + * + * <p>This field MAY contain information about each of the certificates used in + * encryption processing and it can be used to identify who is allowed to + * decrypt encrypted files. This field should only appear in the archive extra + * data record. This field is not required and serves only to aid archive + * modifications by preserving public encryption key data. Individual security + * requirements may dictate that this data be omitted to deter information + * exposure.</p> + * + * <p>Note: all fields stored in Intel low-byte/high-byte order.</p> + * + * <pre> + * Value Size Description + * ----- ---- ----------- + * (CStore) 0x0019 2 bytes Tag for this "extra" block type + * TSize 2 bytes Size of the store data + * Version 2 bytes Format version number - must 0x0001 at this time + * CStore (var) PKCS#7 data blob + * </pre> + * + * <p><b>See the section describing the Strong Encryption Specification for + * details. Refer to the section in this document entitled + * "Incorporating PKWARE Proprietary Technology into Your Product" for more + * information.</b></p> + * + * @NotThreadSafe + * @since 1.11 + */ +public class X0019_EncryptionRecipientCertificateList extends PKWareExtraHeader { + + public X0019_EncryptionRecipientCertificateList() { + super(new ZipShort(0x0019)); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/X5455_ExtendedTimestamp.java b/src/main/java/org/apache/commons/compress/archivers/zip/X5455_ExtendedTimestamp.java new file mode 100644 index 000000000..28590c2b2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/X5455_ExtendedTimestamp.java @@ -0,0 +1,599 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.Serializable; +import java.util.Date; +import java.util.zip.ZipException; + +/** + * <p>An extra field that stores additional file and directory timestamp data + * for zip entries. Each zip entry can include up to three timestamps + * (modify, access, create*). The timestamps are stored as 32 bit signed + * integers representing seconds since UNIX epoch (Jan 1st, 1970, UTC). + * This field improves on zip's default timestamp granularity, since it + * allows one to store additional timestamps, and, in addition, the timestamps + * are stored using per-second granularity (zip's default behaviour can only store + * timestamps to the nearest <em>even</em> second). + * </p><p> + * Unfortunately, 32 (signed) bits can only store dates up to the year 2037, + * and so this extra field will eventually be obsolete. Enjoy it while it lasts! + * </p> + * <ul> + * <li><b>modifyTime:</b> + * most recent time of file/directory modification + * (or file/dir creation if the entry has not been + * modified since it was created). + * </li> + * <li><b>accessTime:</b> + * most recent time file/directory was opened + * (e.g., read from disk). Many people disable + * their operating systems from updating this value + * using the NOATIME mount option to optimize disk behaviour, + * and thus it's not always reliable. In those cases + * it's always equal to modifyTime. + * </li> + * <li><b>*createTime:</b> + * modern linux file systems (e.g., ext2 and newer) + * do not appear to store a value like this, and so + * it's usually omitted altogether in the zip extra + * field. Perhaps other unix systems track this. + * </li></ul> + * <p> + * We're using the field definition given in Info-Zip's source archive: + * zip-3.0.tar.gz/proginfo/extrafld.txt + * </p> + * <pre> + * Value Size Description + * ----- ---- ----------- + * 0x5455 Short tag for this extra block type ("UT") + * TSize Short total data size for this block + * Flags Byte info bits + * (ModTime) Long time of last modification (UTC/GMT) + * (AcTime) Long time of last access (UTC/GMT) + * (CrTime) Long time of original creation (UTC/GMT) + * + * Central-header version: + * + * Value Size Description + * ----- ---- ----------- + * 0x5455 Short tag for this extra block type ("UT") + * TSize Short total data size for this block + * Flags Byte info bits (refers to local header!) + * (ModTime) Long time of last modification (UTC/GMT) + * </pre> + * @since 1.5 + */ +public class X5455_ExtendedTimestamp implements ZipExtraField, Cloneable, Serializable { + private static final ZipShort HEADER_ID = new ZipShort(0x5455); + private static final long serialVersionUID = 1L; + + /** + * The bit set inside the flags by when the last modification time + * is present in this extra field. + */ + public static final byte MODIFY_TIME_BIT = 1; + /** + * The bit set inside the flags by when the lasr access time is + * present in this extra field. + */ + public static final byte ACCESS_TIME_BIT = 2; + /** + * The bit set inside the flags by when the original creation time + * is present in this extra field. + */ + public static final byte CREATE_TIME_BIT = 4; + + // The 3 boolean fields (below) come from this flags byte. The remaining 5 bits + // are ignored according to the current version of the spec (December 2012). + private byte flags; + + // Note: even if bit1 and bit2 are set, the Central data will still not contain + // access/create fields: only local data ever holds those! This causes + // some of our implementation to look a little odd, with seemingly spurious + // != null and length checks. + private boolean bit0_modifyTimePresent; + private boolean bit1_accessTimePresent; + private boolean bit2_createTimePresent; + + private ZipLong modifyTime; + private ZipLong accessTime; + private ZipLong createTime; + + /** + * Constructor for X5455_ExtendedTimestamp. + */ + public X5455_ExtendedTimestamp() {} + + /** + * The Header-ID. + * + * @return the value for the header id for this extrafield + */ + @Override + public ZipShort getHeaderId() { + return HEADER_ID; + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * + * @return a <code>ZipShort</code> for the length of the data of this extra field + */ + @Override + public ZipShort getLocalFileDataLength() { + return new ZipShort(1 + + (bit0_modifyTimePresent ? 4 : 0) + + (bit1_accessTimePresent && accessTime != null ? 4 : 0) + + (bit2_createTimePresent && createTime != null ? 4 : 0) + ); + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * + * <p>For X5455 the central length is often smaller than the + * local length, because central cannot contain access or create + * timestamps.</p> + * + * @return a <code>ZipShort</code> for the length of the data of this extra field + */ + @Override + public ZipShort getCentralDirectoryLength() { + return new ZipShort(1 + + (bit0_modifyTimePresent ? 4 : 0) + ); + } + + /** + * The actual data to put into local file data - without Header-ID + * or length specifier. + * + * @return get the data + */ + @Override + public byte[] getLocalFileDataData() { + final byte[] data = new byte[getLocalFileDataLength().getValue()]; + int pos = 0; + data[pos++] = 0; + if (bit0_modifyTimePresent) { + data[0] |= MODIFY_TIME_BIT; + System.arraycopy(modifyTime.getBytes(), 0, data, pos, 4); + pos += 4; + } + if (bit1_accessTimePresent && accessTime != null) { + data[0] |= ACCESS_TIME_BIT; + System.arraycopy(accessTime.getBytes(), 0, data, pos, 4); + pos += 4; + } + if (bit2_createTimePresent && createTime != null) { + data[0] |= CREATE_TIME_BIT; + System.arraycopy(createTime.getBytes(), 0, data, pos, 4); + pos += 4; // NOSONAR - assignment as documentation + } + return data; + } + + /** + * The actual data to put into central directory data - without Header-ID + * or length specifier. + * + * @return the central directory data + */ + @Override + public byte[] getCentralDirectoryData() { + final byte[] centralData = new byte[getCentralDirectoryLength().getValue()]; + final byte[] localData = getLocalFileDataData(); + + // Truncate out create & access time (last 8 bytes) from + // the copy of the local data we obtained: + System.arraycopy(localData, 0, centralData, 0, centralData.length); + return centralData; + } + + /** + * Populate data from this array as if it was in local file data. + * + * @param data an array of bytes + * @param offset the start offset + * @param length the number of bytes in the array from offset + * @throws java.util.zip.ZipException on error + */ + @Override + public void parseFromLocalFileData( + final byte[] data, int offset, final int length + ) throws ZipException { + reset(); + final int len = offset + length; + setFlags(data[offset++]); + if (bit0_modifyTimePresent) { + modifyTime = new ZipLong(data, offset); + offset += 4; + } + + // Notice the extra length check in case we are parsing the shorter + // central data field (for both access and create timestamps). + if (bit1_accessTimePresent && offset + 4 <= len) { + accessTime = new ZipLong(data, offset); + offset += 4; + } + if (bit2_createTimePresent && offset + 4 <= len) { + createTime = new ZipLong(data, offset); + offset += 4; // NOSONAR - assignment as documentation + } + } + + /** + * Doesn't do anything special since this class always uses the + * same parsing logic for both central directory and local file data. + */ + @Override + public void parseFromCentralDirectoryData( + final byte[] buffer, final int offset, final int length + ) throws ZipException { + reset(); + parseFromLocalFileData(buffer, offset, length); + } + + /** + * Reset state back to newly constructed state. Helps us make sure + * parse() calls always generate clean results. + */ + private void reset() { + setFlags((byte) 0); + this.modifyTime = null; + this.accessTime = null; + this.createTime = null; + } + + /** + * Sets flags byte. The flags byte tells us which of the + * three datestamp fields are present in the data: + * <pre> + * bit0 - modify time + * bit1 - access time + * bit2 - create time + * </pre> + * Only first 3 bits of flags are used according to the + * latest version of the spec (December 2012). + * + * @param flags flags byte indicating which of the + * three datestamp fields are present. + */ + public void setFlags(final byte flags) { + this.flags = flags; + this.bit0_modifyTimePresent = (flags & MODIFY_TIME_BIT) == MODIFY_TIME_BIT; + this.bit1_accessTimePresent = (flags & ACCESS_TIME_BIT) == ACCESS_TIME_BIT; + this.bit2_createTimePresent = (flags & CREATE_TIME_BIT) == CREATE_TIME_BIT; + } + + /** + * Gets flags byte. The flags byte tells us which of the + * three datestamp fields are present in the data: + * <pre> + * bit0 - modify time + * bit1 - access time + * bit2 - create time + * </pre> + * Only first 3 bits of flags are used according to the + * latest version of the spec (December 2012). + * + * @return flags byte indicating which of the + * three datestamp fields are present. + */ + public byte getFlags() { return flags; } + + /** + * Returns whether bit0 of the flags byte is set or not, + * which should correspond to the presence or absence of + * a modify timestamp in this particular zip entry. + * + * @return true if bit0 of the flags byte is set. + */ + public boolean isBit0_modifyTimePresent() { return bit0_modifyTimePresent; } + + /** + * Returns whether bit1 of the flags byte is set or not, + * which should correspond to the presence or absence of + * a "last access" timestamp in this particular zip entry. + * + * @return true if bit1 of the flags byte is set. + */ + public boolean isBit1_accessTimePresent() { return bit1_accessTimePresent; } + + /** + * Returns whether bit2 of the flags byte is set or not, + * which should correspond to the presence or absence of + * a create timestamp in this particular zip entry. + * + * @return true if bit2 of the flags byte is set. + */ + public boolean isBit2_createTimePresent() { return bit2_createTimePresent; } + + /** + * Returns the modify time (seconds since epoch) of this zip entry + * as a ZipLong object, or null if no such timestamp exists in the + * zip entry. + * + * @return modify time (seconds since epoch) or null. + */ + public ZipLong getModifyTime() { return modifyTime; } + + /** + * Returns the access time (seconds since epoch) of this zip entry + * as a ZipLong object, or null if no such timestamp exists in the + * zip entry. + * + * @return access time (seconds since epoch) or null. + */ + public ZipLong getAccessTime() { return accessTime; } + + /** + * <p> + * Returns the create time (seconds since epoch) of this zip entry + * as a ZipLong object, or null if no such timestamp exists in the + * zip entry. + * </p><p> + * Note: modern linux file systems (e.g., ext2) + * do not appear to store a "create time" value, and so + * it's usually omitted altogether in the zip extra + * field. Perhaps other unix systems track this. + * + * @return create time (seconds since epoch) or null. + */ + public ZipLong getCreateTime() { return createTime; } + + /** + * Returns the modify time as a java.util.Date + * of this zip entry, or null if no such timestamp exists in the zip entry. + * The milliseconds are always zeroed out, since the underlying data + * offers only per-second precision. + * + * @return modify time as java.util.Date or null. + */ + public Date getModifyJavaTime() { + return zipLongToDate(modifyTime); + } + + /** + * Returns the access time as a java.util.Date + * of this zip entry, or null if no such timestamp exists in the zip entry. + * The milliseconds are always zeroed out, since the underlying data + * offers only per-second precision. + * + * @return access time as java.util.Date or null. + */ + public Date getAccessJavaTime() { + return zipLongToDate(accessTime); + } + + /** + * <p> + * Returns the create time as a a java.util.Date + * of this zip entry, or null if no such timestamp exists in the zip entry. + * The milliseconds are always zeroed out, since the underlying data + * offers only per-second precision. + * </p><p> + * Note: modern linux file systems (e.g., ext2) + * do not appear to store a "create time" value, and so + * it's usually omitted altogether in the zip extra + * field. Perhaps other unix systems track this. + * + * @return create time as java.util.Date or null. + */ + public Date getCreateJavaTime() { + return zipLongToDate(createTime); + } + + /** + * <p> + * Sets the modify time (seconds since epoch) of this zip entry + * using a ZipLong object. + * </p><p> + * Note: the setters for flags and timestamps are decoupled. + * Even if the timestamp is not-null, it will only be written + * out if the corresponding bit in the flags is also set. + * </p> + * + * @param l ZipLong of the modify time (seconds per epoch) + */ + public void setModifyTime(final ZipLong l) { + bit0_modifyTimePresent = l != null; + flags = (byte) (l != null ? (flags | MODIFY_TIME_BIT) + : (flags & ~MODIFY_TIME_BIT)); + this.modifyTime = l; + } + + /** + * <p> + * Sets the access time (seconds since epoch) of this zip entry + * using a ZipLong object + * </p><p> + * Note: the setters for flags and timestamps are decoupled. + * Even if the timestamp is not-null, it will only be written + * out if the corresponding bit in the flags is also set. + * </p> + * + * @param l ZipLong of the access time (seconds per epoch) + */ + public void setAccessTime(final ZipLong l) { + bit1_accessTimePresent = l != null; + flags = (byte) (l != null ? (flags | ACCESS_TIME_BIT) + : (flags & ~ACCESS_TIME_BIT)); + this.accessTime = l; + } + + /** + * <p> + * Sets the create time (seconds since epoch) of this zip entry + * using a ZipLong object + * </p><p> + * Note: the setters for flags and timestamps are decoupled. + * Even if the timestamp is not-null, it will only be written + * out if the corresponding bit in the flags is also set. + * </p> + * + * @param l ZipLong of the create time (seconds per epoch) + */ + public void setCreateTime(final ZipLong l) { + bit2_createTimePresent = l != null; + flags = (byte) (l != null ? (flags | CREATE_TIME_BIT) + : (flags & ~CREATE_TIME_BIT)); + this.createTime = l; + } + + /** + * <p> + * Sets the modify time as a java.util.Date + * of this zip entry. Supplied value is truncated to per-second + * precision (milliseconds zeroed-out). + * </p><p> + * Note: the setters for flags and timestamps are decoupled. + * Even if the timestamp is not-null, it will only be written + * out if the corresponding bit in the flags is also set. + * </p> + * + * @param d modify time as java.util.Date + */ + public void setModifyJavaTime(final Date d) { setModifyTime(dateToZipLong(d)); } + + /** + * <p> + * Sets the access time as a java.util.Date + * of this zip entry. Supplied value is truncated to per-second + * precision (milliseconds zeroed-out). + * </p><p> + * Note: the setters for flags and timestamps are decoupled. + * Even if the timestamp is not-null, it will only be written + * out if the corresponding bit in the flags is also set. + * </p> + * + * @param d access time as java.util.Date + */ + public void setAccessJavaTime(final Date d) { setAccessTime(dateToZipLong(d)); } + + /** + * <p> + * Sets the create time as a java.util.Date + * of this zip entry. Supplied value is truncated to per-second + * precision (milliseconds zeroed-out). + * </p><p> + * Note: the setters for flags and timestamps are decoupled. + * Even if the timestamp is not-null, it will only be written + * out if the corresponding bit in the flags is also set. + * </p> + * + * @param d create time as java.util.Date + */ + public void setCreateJavaTime(final Date d) { setCreateTime(dateToZipLong(d)); } + + /** + * Utility method converts java.util.Date (milliseconds since epoch) + * into a ZipLong (seconds since epoch). + * <p/> + * Also makes sure the converted ZipLong is not too big to fit + * in 32 unsigned bits. + * + * @param d java.util.Date to convert to ZipLong + * @return ZipLong + */ + private static ZipLong dateToZipLong(final Date d) { + if (d == null) { return null; } + + return unixTimeToZipLong(d.getTime() / 1000); + } + + /** + * Returns a String representation of this class useful for + * debugging purposes. + * + * @return A String representation of this class useful for + * debugging purposes. + */ + @Override + public String toString() { + final StringBuilder buf = new StringBuilder(); + buf.append("0x5455 Zip Extra Field: Flags="); + buf.append(Integer.toBinaryString(ZipUtil.unsignedIntToSignedByte(flags))).append(" "); + if (bit0_modifyTimePresent && modifyTime != null) { + final Date m = getModifyJavaTime(); + buf.append(" Modify:[").append(m).append("] "); + } + if (bit1_accessTimePresent && accessTime != null) { + final Date a = getAccessJavaTime(); + buf.append(" Access:[").append(a).append("] "); + } + if (bit2_createTimePresent && createTime != null) { + final Date c = getCreateJavaTime(); + buf.append(" Create:[").append(c).append("] "); + } + return buf.toString(); + } + + @Override + public Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + @Override + public boolean equals(final Object o) { + if (o instanceof X5455_ExtendedTimestamp) { + final X5455_ExtendedTimestamp xf = (X5455_ExtendedTimestamp) o; + + // The ZipLong==ZipLong clauses handle the cases where both are null. + // and only last 3 bits of flags matter. + return ((flags & 0x07) == (xf.flags & 0x07)) && + (modifyTime == xf.modifyTime || (modifyTime != null && modifyTime.equals(xf.modifyTime))) && + (accessTime == xf.accessTime || (accessTime != null && accessTime.equals(xf.accessTime))) && + (createTime == xf.createTime || (createTime != null && createTime.equals(xf.createTime))); + } + return false; + } + + @Override + public int hashCode() { + int hc = (-123 * (flags & 0x07)); // only last 3 bits of flags matter + if (modifyTime != null) { + hc ^= modifyTime.hashCode(); + } + if (accessTime != null) { + // Since accessTime is often same as modifyTime, + // this prevents them from XOR negating each other. + hc ^= Integer.rotateLeft(accessTime.hashCode(), 11); + } + if (createTime != null) { + hc ^= Integer.rotateLeft(createTime.hashCode(), 22); + } + return hc; + } + + private static Date zipLongToDate(ZipLong unixTime) { + return unixTime != null ? new Date(unixTime.getIntValue() * 1000L) : null; + } + + private static ZipLong unixTimeToZipLong(long l) { + if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) { + throw new IllegalArgumentException("X5455 timestamps must fit in a signed 32 bit integer: " + l); + } + return new ZipLong(l); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/X7875_NewUnix.java b/src/main/java/org/apache/commons/compress/archivers/zip/X7875_NewUnix.java new file mode 100644 index 000000000..a540dba8a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/X7875_NewUnix.java @@ -0,0 +1,365 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.Serializable; +import java.math.BigInteger; +import java.util.zip.ZipException; + +import static org.apache.commons.compress.archivers.zip.ZipUtil.reverse; +import static org.apache.commons.compress.archivers.zip.ZipUtil.signedByteToUnsignedInt; +import static org.apache.commons.compress.archivers.zip.ZipUtil.unsignedIntToSignedByte; + +/** + * An extra field that stores UNIX UID/GID data (owner & group ownership) for a given + * zip entry. We're using the field definition given in Info-Zip's source archive: + * zip-3.0.tar.gz/proginfo/extrafld.txt + * + * <pre> + * Local-header version: + * + * Value Size Description + * ----- ---- ----------- + * 0x7875 Short tag for this extra block type ("ux") + * TSize Short total data size for this block + * Version 1 byte version of this extra field, currently 1 + * UIDSize 1 byte Size of UID field + * UID Variable UID for this entry (little endian) + * GIDSize 1 byte Size of GID field + * GID Variable GID for this entry (little endian) + * + * Central-header version: + * + * Value Size Description + * ----- ---- ----------- + * 0x7855 Short tag for this extra block type ("Ux") + * TSize Short total data size for this block (0) + * </pre> + * @since 1.5 + */ +public class X7875_NewUnix implements ZipExtraField, Cloneable, Serializable { + private static final ZipShort HEADER_ID = new ZipShort(0x7875); + private static final ZipShort ZERO = new ZipShort(0); + private static final BigInteger ONE_THOUSAND = BigInteger.valueOf(1000); + private static final long serialVersionUID = 1L; + + private int version = 1; // always '1' according to current info-zip spec. + + // BigInteger helps us with little-endian / big-endian conversions. + // (thanks to BigInteger.toByteArray() and a reverse() method we created). + // Also, the spec theoretically allows UID/GID up to 255 bytes long! + // + // NOTE: equals() and hashCode() currently assume these can never be null. + private BigInteger uid; + private BigInteger gid; + + /** + * Constructor for X7875_NewUnix. + */ + public X7875_NewUnix() { + reset(); + } + + /** + * The Header-ID. + * + * @return the value for the header id for this extrafield + */ + @Override + public ZipShort getHeaderId() { + return HEADER_ID; + } + + /** + * Gets the UID as a long. UID is typically a 32 bit unsigned + * value on most UNIX systems, so we return a long to avoid + * integer overflow into the negatives in case values above + * and including 2^31 are being used. + * + * @return the UID value. + */ + public long getUID() { return ZipUtil.bigToLong(uid); } + + /** + * Gets the GID as a long. GID is typically a 32 bit unsigned + * value on most UNIX systems, so we return a long to avoid + * integer overflow into the negatives in case values above + * and including 2^31 are being used. + * + * @return the GID value. + */ + public long getGID() { return ZipUtil.bigToLong(gid); } + + /** + * Sets the UID. + * + * @param l UID value to set on this extra field. + */ + public void setUID(final long l) { + this.uid = ZipUtil.longToBig(l); + } + + /** + * Sets the GID. + * + * @param l GID value to set on this extra field. + */ + public void setGID(final long l) { + this.gid = ZipUtil.longToBig(l); + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * + * @return a <code>ZipShort</code> for the length of the data of this extra field + */ + @Override + public ZipShort getLocalFileDataLength() { + byte[] b = trimLeadingZeroesForceMinLength(uid.toByteArray()); + final int uidSize = b == null ? 0 : b.length; + b = trimLeadingZeroesForceMinLength(gid.toByteArray()); + final int gidSize = b == null ? 0 : b.length; + + // The 3 comes from: version=1 + uidsize=1 + gidsize=1 + return new ZipShort(3 + uidSize + gidSize); + } + + /** + * Length of the extra field in the central directory data - without + * Header-ID or length specifier. + * + * @return a <code>ZipShort</code> for the length of the data of this extra field + */ + @Override + public ZipShort getCentralDirectoryLength() { + return ZERO; + } + + /** + * The actual data to put into local file data - without Header-ID + * or length specifier. + * + * @return get the data + */ + @Override + public byte[] getLocalFileDataData() { + byte[] uidBytes = uid.toByteArray(); + byte[] gidBytes = gid.toByteArray(); + + // BigInteger might prepend a leading-zero to force a positive representation + // (e.g., so that the sign-bit is set to zero). We need to remove that + // before sending the number over the wire. + uidBytes = trimLeadingZeroesForceMinLength(uidBytes); + int uidBytesLen = uidBytes != null ? uidBytes.length : 0; + gidBytes = trimLeadingZeroesForceMinLength(gidBytes); + int gidBytesLen = gidBytes != null ? gidBytes.length : 0; + + // Couldn't bring myself to just call getLocalFileDataLength() when we've + // already got the arrays right here. Yeah, yeah, I know, premature + // optimization is the root of all... + // + // The 3 comes from: version=1 + uidsize=1 + gidsize=1 + final byte[] data = new byte[3 + uidBytesLen + gidBytesLen]; + + // reverse() switches byte array from big-endian to little-endian. + if (uidBytes != null) { + reverse(uidBytes); + } + if (gidBytes != null) { + reverse(gidBytes); + } + + int pos = 0; + data[pos++] = unsignedIntToSignedByte(version); + data[pos++] = unsignedIntToSignedByte(uidBytesLen); + if (uidBytes != null) { + System.arraycopy(uidBytes, 0, data, pos, uidBytesLen); + } + pos += uidBytesLen; + data[pos++] = unsignedIntToSignedByte(gidBytesLen); + if (gidBytes != null) { + System.arraycopy(gidBytes, 0, data, pos, gidBytesLen); + } + return data; + } + + /** + * The actual data to put into central directory data - without Header-ID + * or length specifier. + * + * @return get the data + */ + @Override + public byte[] getCentralDirectoryData() { + return new byte[0]; + } + + /** + * Populate data from this array as if it was in local file data. + * + * @param data an array of bytes + * @param offset the start offset + * @param length the number of bytes in the array from offset + * @throws java.util.zip.ZipException on error + */ + @Override + public void parseFromLocalFileData( + final byte[] data, int offset, final int length + ) throws ZipException { + reset(); + this.version = signedByteToUnsignedInt(data[offset++]); + final int uidSize = signedByteToUnsignedInt(data[offset++]); + final byte[] uidBytes = new byte[uidSize]; + System.arraycopy(data, offset, uidBytes, 0, uidSize); + offset += uidSize; + this.uid = new BigInteger(1, reverse(uidBytes)); // sign-bit forced positive + + final int gidSize = signedByteToUnsignedInt(data[offset++]); + final byte[] gidBytes = new byte[gidSize]; + System.arraycopy(data, offset, gidBytes, 0, gidSize); + this.gid = new BigInteger(1, reverse(gidBytes)); // sign-bit forced positive + } + + /** + * Doesn't do anything since this class doesn't store anything + * inside the central directory. + */ + @Override + public void parseFromCentralDirectoryData( + final byte[] buffer, final int offset, final int length + ) throws ZipException { + } + + /** + * Reset state back to newly constructed state. Helps us make sure + * parse() calls always generate clean results. + */ + private void reset() { + // Typical UID/GID of the first non-root user created on a unix system. + uid = ONE_THOUSAND; + gid = ONE_THOUSAND; + } + + /** + * Returns a String representation of this class useful for + * debugging purposes. + * + * @return A String representation of this class useful for + * debugging purposes. + */ + @Override + public String toString() { + return "0x7875 Zip Extra Field: UID=" + uid + " GID=" + gid; + } + + @Override + public Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + @Override + public boolean equals(final Object o) { + if (o instanceof X7875_NewUnix) { + final X7875_NewUnix xf = (X7875_NewUnix) o; + // We assume uid and gid can never be null. + return version == xf.version && uid.equals(xf.uid) && gid.equals(xf.gid); + } + return false; + } + + @Override + public int hashCode() { + int hc = -1234567 * version; + // Since most UID's and GID's are below 65,536, this is (hopefully!) + // a nice way to make sure typical UID and GID values impact the hash + // as much as possible. + hc ^= Integer.rotateLeft(uid.hashCode(), 16); + hc ^= gid.hashCode(); + return hc; + } + + /** + * Not really for external usage, but marked "package" visibility + * to help us JUnit it. Trims a byte array of leading zeroes while + * also enforcing a minimum length, and thus it really trims AND pads + * at the same time. + * + * @param array byte[] array to trim & pad. + * @return trimmed & padded byte[] array. + */ + static byte[] trimLeadingZeroesForceMinLength(final byte[] array) { + if (array == null) { + return array; + } + + int pos = 0; + for (final byte b : array) { + if (b == 0) { + pos++; + } else { + break; + } + } + + /* + + I agonized over my choice of MIN_LENGTH=1. Here's the situation: + InfoZip (the tool I am using to test interop) always sets these + to length=4. And so a UID of 0 (typically root) for example is + encoded as {4,0,0,0,0} (len=4, 32 bits of zero), when it could just + as easily be encoded as {1,0} (len=1, 8 bits of zero) according to + the spec. + + In the end I decided on MIN_LENGTH=1 for four reasons: + + 1.) We are adhering to the spec as far as I can tell, and so + a consumer that cannot parse this is broken. + + 2.) Fundamentally, zip files are about shrinking things, so + let's save a few bytes per entry while we can. + + 3.) Of all the people creating zip files using commons- + compress, how many care about UNIX UID/GID attributes + of the files they store? (e.g., I am probably thinking + way too hard about this and no one cares!) + + 4.) InfoZip's tool, even though it carefully stores every UID/GID + for every file zipped on a unix machine (by default) currently + appears unable to ever restore UID/GID. + unzip -X has no effect on my machine, even when run as root!!!! + + And thus it is decided: MIN_LENGTH=1. + + If anyone runs into interop problems from this, feel free to set + it to MIN_LENGTH=4 at some future time, and then we will behave + exactly like InfoZip (requires changes to unit tests, though). + + And I am sorry that the time you spent reading this comment is now + gone and you can never have it back. + + */ + final int MIN_LENGTH = 1; + + final byte[] trimmedArray = new byte[Math.max(MIN_LENGTH, array.length - pos)]; + final int startPos = trimmedArray.length - (array.length - pos); + System.arraycopy(array, pos, trimmedArray, startPos, trimmedArray.length - startPos); + return trimmedArray; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/Zip64ExtendedInformationExtraField.java b/src/main/java/org/apache/commons/compress/archivers/zip/Zip64ExtendedInformationExtraField.java new file mode 100644 index 000000000..0578e5847 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/Zip64ExtendedInformationExtraField.java @@ -0,0 +1,340 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import java.util.zip.ZipException; + +import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; + +/** + * Holds size and other extended information for entries that use Zip64 + * features. + * + * <p>Currently Commons Compress doesn't support encrypting the + * central directory so the note in APPNOTE.TXT about masking doesn't + * apply.</p> + * + * <p>The implementation relies on data being read from the local file + * header and assumes that both size values are always present.</p> + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">PKWARE + * APPNOTE.TXT, section 4.5.3</a> + * + * @since 1.2 + * @NotThreadSafe + */ +public class Zip64ExtendedInformationExtraField implements ZipExtraField { + + static final ZipShort HEADER_ID = new ZipShort(0x0001); + + private static final String LFH_MUST_HAVE_BOTH_SIZES_MSG = + "Zip64 extended information must contain" + + " both size values in the local file header."; + private static final byte[] EMPTY = new byte[0]; + + private ZipEightByteInteger size, compressedSize, relativeHeaderOffset; + private ZipLong diskStart; + + /** + * Stored in {@link #parseFromCentralDirectoryData + * parseFromCentralDirectoryData} so it can be reused when ZipFile + * calls {@link #reparseCentralDirectoryData + * reparseCentralDirectoryData}. + * + * <p>Not used for anything else</p> + * + * @since 1.3 + */ + private byte[] rawCentralDirectoryData; + + /** + * This constructor should only be used by the code that reads + * archives inside of Commons Compress. + */ + public Zip64ExtendedInformationExtraField() { } + + /** + * Creates an extra field based on the original and compressed size. + * + * @param size the entry's original size + * @param compressedSize the entry's compressed size + * + * @throws IllegalArgumentException if size or compressedSize is null + */ + public Zip64ExtendedInformationExtraField(final ZipEightByteInteger size, + final ZipEightByteInteger compressedSize) { + this(size, compressedSize, null, null); + } + + /** + * Creates an extra field based on all four possible values. + * + * @param size the entry's original size + * @param compressedSize the entry's compressed size + * @param relativeHeaderOffset the entry's offset + * @param diskStart the disk start + * + * @throws IllegalArgumentException if size or compressedSize is null + */ + public Zip64ExtendedInformationExtraField(final ZipEightByteInteger size, + final ZipEightByteInteger compressedSize, + final ZipEightByteInteger relativeHeaderOffset, + final ZipLong diskStart) { + this.size = size; + this.compressedSize = compressedSize; + this.relativeHeaderOffset = relativeHeaderOffset; + this.diskStart = diskStart; + } + + @Override + public ZipShort getHeaderId() { + return HEADER_ID; + } + + @Override + public ZipShort getLocalFileDataLength() { + return new ZipShort(size != null ? 2 * DWORD : 0); + } + + @Override + public ZipShort getCentralDirectoryLength() { + return new ZipShort((size != null ? DWORD : 0) + + (compressedSize != null ? DWORD : 0) + + (relativeHeaderOffset != null ? DWORD : 0) + + (diskStart != null ? WORD : 0)); + } + + @Override + public byte[] getLocalFileDataData() { + if (size != null || compressedSize != null) { + if (size == null || compressedSize == null) { + throw new IllegalArgumentException(LFH_MUST_HAVE_BOTH_SIZES_MSG); + } + final byte[] data = new byte[2 * DWORD]; + addSizes(data); + return data; + } + return EMPTY; + } + + @Override + public byte[] getCentralDirectoryData() { + final byte[] data = new byte[getCentralDirectoryLength().getValue()]; + int off = addSizes(data); + if (relativeHeaderOffset != null) { + System.arraycopy(relativeHeaderOffset.getBytes(), 0, data, off, DWORD); + off += DWORD; + } + if (diskStart != null) { + System.arraycopy(diskStart.getBytes(), 0, data, off, WORD); + off += WORD; // NOSONAR - assignment as documentation + } + return data; + } + + @Override + public void parseFromLocalFileData(final byte[] buffer, int offset, final int length) + throws ZipException { + if (length == 0) { + // no local file data at all, may happen if an archive + // only holds a ZIP64 extended information extra field + // inside the central directory but not inside the local + // file header + return; + } + if (length < 2 * DWORD) { + throw new ZipException(LFH_MUST_HAVE_BOTH_SIZES_MSG); + } + size = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + compressedSize = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + int remaining = length - 2 * DWORD; + if (remaining >= DWORD) { + relativeHeaderOffset = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + remaining -= DWORD; + } + if (remaining >= WORD) { + diskStart = new ZipLong(buffer, offset); + offset += WORD; // NOSONAR - assignment as documentation + remaining -= WORD; // NOSONAR - assignment as documentation + } + } + + @Override + public void parseFromCentralDirectoryData(final byte[] buffer, int offset, + final int length) + throws ZipException { + // store for processing in reparseCentralDirectoryData + rawCentralDirectoryData = new byte[length]; + System.arraycopy(buffer, offset, rawCentralDirectoryData, 0, length); + + // if there is no size information in here, we are screwed and + // can only hope things will get resolved by LFH data later + // But there are some cases that can be detected + // * all data is there + // * length == 24 -> both sizes and offset + // * length % 8 == 4 -> at least we can identify the diskStart field + if (length >= 3 * DWORD + WORD) { + parseFromLocalFileData(buffer, offset, length); + } else if (length == 3 * DWORD) { + size = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + compressedSize = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + relativeHeaderOffset = new ZipEightByteInteger(buffer, offset); + } else if (length % DWORD == WORD) { + diskStart = new ZipLong(buffer, offset + length - WORD); + } + } + + /** + * Parses the raw bytes read from the central directory extra + * field with knowledge which fields are expected to be there. + * + * <p>All four fields inside the zip64 extended information extra + * field are optional and must only be present if their corresponding + * entry inside the central directory contains the correct magic + * value.</p> + * + * @param hasUncompressedSize flag to read from central directory + * @param hasCompressedSize flag to read from central directory + * @param hasRelativeHeaderOffset flag to read from central directory + * @param hasDiskStart flag to read from central directory + * @throws ZipException on error + */ + public void reparseCentralDirectoryData(final boolean hasUncompressedSize, + final boolean hasCompressedSize, + final boolean hasRelativeHeaderOffset, + final boolean hasDiskStart) + throws ZipException { + if (rawCentralDirectoryData != null) { + final int expectedLength = (hasUncompressedSize ? DWORD : 0) + + (hasCompressedSize ? DWORD : 0) + + (hasRelativeHeaderOffset ? DWORD : 0) + + (hasDiskStart ? WORD : 0); + if (rawCentralDirectoryData.length < expectedLength) { + throw new ZipException("central directory zip64 extended" + + " information extra field's length" + + " doesn't match central directory" + + " data. Expected length " + + expectedLength + " but is " + + rawCentralDirectoryData.length); + } + int offset = 0; + if (hasUncompressedSize) { + size = new ZipEightByteInteger(rawCentralDirectoryData, offset); + offset += DWORD; + } + if (hasCompressedSize) { + compressedSize = new ZipEightByteInteger(rawCentralDirectoryData, + offset); + offset += DWORD; + } + if (hasRelativeHeaderOffset) { + relativeHeaderOffset = + new ZipEightByteInteger(rawCentralDirectoryData, offset); + offset += DWORD; + } + if (hasDiskStart) { + diskStart = new ZipLong(rawCentralDirectoryData, offset); + offset += WORD; // NOSONAR - assignment as documentation + } + } + } + + /** + * The uncompressed size stored in this extra field. + * @return The uncompressed size stored in this extra field. + */ + public ZipEightByteInteger getSize() { + return size; + } + + /** + * The uncompressed size stored in this extra field. + * @param size The uncompressed size stored in this extra field. + */ + public void setSize(final ZipEightByteInteger size) { + this.size = size; + } + + /** + * The compressed size stored in this extra field. + * @return The compressed size stored in this extra field. + */ + public ZipEightByteInteger getCompressedSize() { + return compressedSize; + } + + /** + * The uncompressed size stored in this extra field. + * @param compressedSize The uncompressed size stored in this extra field. + */ + public void setCompressedSize(final ZipEightByteInteger compressedSize) { + this.compressedSize = compressedSize; + } + + /** + * The relative header offset stored in this extra field. + * @return The relative header offset stored in this extra field. + */ + public ZipEightByteInteger getRelativeHeaderOffset() { + return relativeHeaderOffset; + } + + /** + * The relative header offset stored in this extra field. + * @param rho The relative header offset stored in this extra field. + */ + public void setRelativeHeaderOffset(final ZipEightByteInteger rho) { + relativeHeaderOffset = rho; + } + + /** + * The disk start number stored in this extra field. + * @return The disk start number stored in this extra field. + */ + public ZipLong getDiskStartNumber() { + return diskStart; + } + + /** + * The disk start number stored in this extra field. + * @param ds The disk start number stored in this extra field. + */ + public void setDiskStartNumber(final ZipLong ds) { + diskStart = ds; + } + + private int addSizes(final byte[] data) { + int off = 0; + if (size != null) { + System.arraycopy(size.getBytes(), 0, data, 0, DWORD); + off += DWORD; + } + if (compressedSize != null) { + System.arraycopy(compressedSize.getBytes(), 0, data, off, DWORD); + off += DWORD; + } + return off; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/Zip64Mode.java b/src/main/java/org/apache/commons/compress/archivers/zip/Zip64Mode.java new file mode 100644 index 000000000..d051e8982 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/Zip64Mode.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +/** + * The different modes {@link ZipArchiveOutputStream} can operate in. + * + * @see ZipArchiveOutputStream#setUseZip64 + * + * @since 1.3 + */ +public enum Zip64Mode { + /** + * Use Zip64 extensions for all entries, even if it is clear it is + * not required. + */ + Always, + /** + * Don't use Zip64 extensions for any entries. + * + * <p>This will cause a {@link Zip64RequiredException} to be + * thrown if {@link ZipArchiveOutputStream} detects it needs Zip64 + * support.</p> + */ + Never, + /** + * Use Zip64 extensions for all entries where they are required, + * don't use them for entries that clearly don't require them. + */ + AsNeeded +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/Zip64RequiredException.java b/src/main/java/org/apache/commons/compress/archivers/zip/Zip64RequiredException.java new file mode 100644 index 000000000..1ec7e70b8 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/Zip64RequiredException.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import java.util.zip.ZipException; + +/** + * Exception thrown when attempting to write data that requires Zip64 + * support to an archive and {@link ZipArchiveOutputStream#setUseZip64 + * UseZip64} has been set to {@link Zip64Mode#Never Never}. + * @since 1.3 + */ +public class Zip64RequiredException extends ZipException { + + private static final long serialVersionUID = 20110809L; + + /** + * Helper to format "entry too big" messages. + */ + static String getEntryTooBigMessage(final ZipArchiveEntry ze) { + return ze.getName() + "'s size exceeds the limit of 4GByte."; + } + + static final String ARCHIVE_TOO_BIG_MESSAGE = + "archive's size exceeds the limit of 4GByte."; + + static final String TOO_MANY_ENTRIES_MESSAGE = + "archive contains more than 65535 entries."; + + public Zip64RequiredException(final String reason) { + super(reason); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java new file mode 100644 index 000000000..4a09eac4f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java @@ -0,0 +1,1016 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.EntryStreamOffsets; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.List; +import java.util.zip.ZipException; + +/** + * Extension that adds better handling of extra fields and provides + * access to the internal and external file attributes. + * + * <p>The extra data is expected to follow the recommendation of + * <a href="http://www.pkware.com/documents/casestudies/APPNOTE.TXT">APPNOTE.TXT</a>:</p> + * <ul> + * <li>the extra byte array consists of a sequence of extra fields</li> + * <li>each extra fields starts by a two byte header id followed by + * a two byte sequence holding the length of the remainder of + * data.</li> + * </ul> + * + * <p>Any extra data that cannot be parsed by the rules above will be + * consumed as "unparseable" extra data and treated differently by the + * methods of this class. Versions prior to Apache Commons Compress + * 1.1 would have thrown an exception if any attempt was made to read + * or write extra data not conforming to the recommendation.</p> + * + * @NotThreadSafe + */ +public class ZipArchiveEntry extends java.util.zip.ZipEntry + implements ArchiveEntry, EntryStreamOffsets +{ + + public static final int PLATFORM_UNIX = 3; + public static final int PLATFORM_FAT = 0; + public static final int CRC_UNKNOWN = -1; + private static final int SHORT_MASK = 0xFFFF; + private static final int SHORT_SHIFT = 16; + private static final byte[] EMPTY = new byte[0]; + + /** + * Indicates how the name of this entry has been determined. + * @since 1.16 + */ + public enum NameSource { + /** + * The name has been read from the archive using the encoding + * of the archive specified when creating the {@link + * ZipArchiveInputStream} or {@link ZipFile} (defaults to the + * platform's default encoding). + */ + NAME, + /** + * The name has been read from the archive and the archive + * specified the EFS flag which indicates the name has been + * encoded as UTF-8. + */ + NAME_WITH_EFS_FLAG, + /** + * The name has been read from an {@link UnicodePathExtraField + * Unicode Extra Field}. + */ + UNICODE_EXTRA_FIELD + } + + /** + * Indicates how the comment of this entry has been determined. + * @since 1.16 + */ + public enum CommentSource { + /** + * The comment has been read from the archive using the encoding + * of the archive specified when creating the {@link + * ZipArchiveInputStream} or {@link ZipFile} (defaults to the + * platform's default encoding). + */ + COMMENT, + /** + * The comment has been read from an {@link UnicodeCommentExtraField + * Unicode Extra Field}. + */ + UNICODE_EXTRA_FIELD + } + + /** + * The {@link java.util.zip.ZipEntry} base class only supports + * the compression methods STORED and DEFLATED. We override the + * field so that any compression methods can be used. + * <p> + * The default value -1 means that the method has not been specified. + * + * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-93" + * >COMPRESS-93</a> + */ + private int method = ZipMethod.UNKNOWN_CODE; + + /** + * The {@link java.util.zip.ZipEntry#setSize} method in the base + * class throws an IllegalArgumentException if the size is bigger + * than 2GB for Java versions < 7 and even in Java 7+ if the + * implementation in java.util.zip doesn't support Zip64 itself + * (it is an optional feature). + * + * <p>We need to keep our own size information for Zip64 support.</p> + */ + private long size = SIZE_UNKNOWN; + + private int internalAttributes = 0; + private int versionRequired; + private int versionMadeBy; + private int platform = PLATFORM_FAT; + private int rawFlag; + private long externalAttributes = 0; + private int alignment = 0; + private ZipExtraField[] extraFields; + private UnparseableExtraFieldData unparseableExtra = null; + private String name = null; + private byte[] rawName = null; + private GeneralPurposeBit gpb = new GeneralPurposeBit(); + private static final ZipExtraField[] noExtraFields = new ZipExtraField[0]; + private long localHeaderOffset = OFFSET_UNKNOWN; + private long dataOffset = OFFSET_UNKNOWN; + private boolean isStreamContiguous = false; + private NameSource nameSource = NameSource.NAME; + private CommentSource commentSource = CommentSource.COMMENT; + + + /** + * Creates a new zip entry with the specified name. + * + * <p>Assumes the entry represents a directory if and only if the + * name ends with a forward slash "/".</p> + * + * @param name the name of the entry + */ + public ZipArchiveEntry(final String name) { + super(name); + setName(name); + } + + /** + * Creates a new zip entry with fields taken from the specified zip entry. + * + * <p>Assumes the entry represents a directory if and only if the + * name ends with a forward slash "/".</p> + * + * @param entry the entry to get fields from + * @throws ZipException on error + */ + public ZipArchiveEntry(final java.util.zip.ZipEntry entry) throws ZipException { + super(entry); + setName(entry.getName()); + final byte[] extra = entry.getExtra(); + if (extra != null) { + setExtraFields(ExtraFieldUtils.parse(extra, true, + ExtraFieldUtils + .UnparseableExtraField.READ)); + } else { + // initializes extra data to an empty byte array + setExtra(); + } + setMethod(entry.getMethod()); + this.size = entry.getSize(); + } + + /** + * Creates a new zip entry with fields taken from the specified zip entry. + * + * <p>Assumes the entry represents a directory if and only if the + * name ends with a forward slash "/".</p> + * + * @param entry the entry to get fields from + * @throws ZipException on error + */ + public ZipArchiveEntry(final ZipArchiveEntry entry) throws ZipException { + this((java.util.zip.ZipEntry) entry); + setInternalAttributes(entry.getInternalAttributes()); + setExternalAttributes(entry.getExternalAttributes()); + setExtraFields(getAllExtraFieldsNoCopy()); + setPlatform(entry.getPlatform()); + final GeneralPurposeBit other = entry.getGeneralPurposeBit(); + setGeneralPurposeBit(other == null ? null : + (GeneralPurposeBit) other.clone()); + } + + /** + */ + protected ZipArchiveEntry() { + this(""); + } + + /** + * Creates a new zip entry taking some information from the given + * file and using the provided name. + * + * <p>The name will be adjusted to end with a forward slash "/" if + * the file is a directory. If the file is not a directory a + * potential trailing forward slash will be stripped from the + * entry name.</p> + * @param inputFile file to create the entry from + * @param entryName name of the entry + */ + public ZipArchiveEntry(final File inputFile, final String entryName) { + this(inputFile.isDirectory() && !entryName.endsWith("/") ? + entryName + "/" : entryName); + if (inputFile.isFile()){ + setSize(inputFile.length()); + } + setTime(inputFile.lastModified()); + // TODO are there any other fields we can set here? + } + + /** + * Overwrite clone. + * @return a cloned copy of this ZipArchiveEntry + */ + @Override + public Object clone() { + final ZipArchiveEntry e = (ZipArchiveEntry) super.clone(); + + e.setInternalAttributes(getInternalAttributes()); + e.setExternalAttributes(getExternalAttributes()); + e.setExtraFields(getAllExtraFieldsNoCopy()); + return e; + } + + /** + * Returns the compression method of this entry, or -1 if the + * compression method has not been specified. + * + * @return compression method + * + * @since 1.1 + */ + @Override + public int getMethod() { + return method; + } + + /** + * Sets the compression method of this entry. + * + * @param method compression method + * + * @since 1.1 + */ + @Override + public void setMethod(final int method) { + if (method < 0) { + throw new IllegalArgumentException( + "ZIP compression method can not be negative: " + method); + } + this.method = method; + } + + /** + * Retrieves the internal file attributes. + * + * <p><b>Note</b>: {@link ZipArchiveInputStream} is unable to fill + * this field, you must use {@link ZipFile} if you want to read + * entries using this attribute.</p> + * + * @return the internal file attributes + */ + public int getInternalAttributes() { + return internalAttributes; + } + + /** + * Sets the internal file attributes. + * @param value an <code>int</code> value + */ + public void setInternalAttributes(final int value) { + internalAttributes = value; + } + + /** + * Retrieves the external file attributes. + * + * <p><b>Note</b>: {@link ZipArchiveInputStream} is unable to fill + * this field, you must use {@link ZipFile} if you want to read + * entries using this attribute.</p> + * + * @return the external file attributes + */ + public long getExternalAttributes() { + return externalAttributes; + } + + /** + * Sets the external file attributes. + * @param value an <code>long</code> value + */ + public void setExternalAttributes(final long value) { + externalAttributes = value; + } + + /** + * Sets Unix permissions in a way that is understood by Info-Zip's + * unzip command. + * @param mode an <code>int</code> value + */ + public void setUnixMode(final int mode) { + // CheckStyle:MagicNumberCheck OFF - no point + setExternalAttributes((mode << SHORT_SHIFT) + // MS-DOS read-only attribute + | ((mode & 0200) == 0 ? 1 : 0) + // MS-DOS directory flag + | (isDirectory() ? 0x10 : 0)); + // CheckStyle:MagicNumberCheck ON + platform = PLATFORM_UNIX; + } + + /** + * Unix permission. + * @return the unix permissions + */ + public int getUnixMode() { + return platform != PLATFORM_UNIX ? 0 : + (int) ((getExternalAttributes() >> SHORT_SHIFT) & SHORT_MASK); + } + + /** + * Returns true if this entry represents a unix symlink, + * in which case the entry's content contains the target path + * for the symlink. + * + * @since 1.5 + * @return true if the entry represents a unix symlink, false otherwise. + */ + public boolean isUnixSymlink() { + return (getUnixMode() & UnixStat.FILE_TYPE_FLAG) == UnixStat.LINK_FLAG; + } + + /** + * Platform specification to put into the "version made + * by" part of the central file header. + * + * @return PLATFORM_FAT unless {@link #setUnixMode setUnixMode} + * has been called, in which case PLATFORM_UNIX will be returned. + */ + public int getPlatform() { + return platform; + } + + /** + * Set the platform (UNIX or FAT). + * @param platform an <code>int</code> value - 0 is FAT, 3 is UNIX + */ + protected void setPlatform(final int platform) { + this.platform = platform; + } + + /** + * Gets currently configured alignment. + * + * @return + * alignment for this entry. + * @since 1.14 + */ + protected int getAlignment() { + return this.alignment; + } + + /** + * Sets alignment for this entry. + * + * @param alignment + * requested alignment, 0 for default. + * @since 1.14 + */ + public void setAlignment(int alignment) { + if ((alignment & (alignment - 1)) != 0 || alignment > 0xffff) { + throw new IllegalArgumentException("Invalid value for alignment, must be power of two and no bigger than " + + 0xffff + " but is " + alignment); + } + this.alignment = alignment; + } + + /** + * Replaces all currently attached extra fields with the new array. + * @param fields an array of extra fields + */ + public void setExtraFields(final ZipExtraField[] fields) { + final List<ZipExtraField> newFields = new ArrayList<>(); + for (final ZipExtraField field : fields) { + if (field instanceof UnparseableExtraFieldData) { + unparseableExtra = (UnparseableExtraFieldData) field; + } else { + newFields.add( field); + } + } + extraFields = newFields.toArray(new ZipExtraField[newFields.size()]); + setExtra(); + } + + /** + * Retrieves all extra fields that have been parsed successfully. + * + * <p><b>Note</b>: The set of extra fields may be incomplete when + * {@link ZipArchiveInputStream} has been used as some extra + * fields use the central directory to store additional + * information.</p> + * + * @return an array of the extra fields + */ + public ZipExtraField[] getExtraFields() { + return getParseableExtraFields(); + } + + /** + * Retrieves extra fields. + * @param includeUnparseable whether to also return unparseable + * extra fields as {@link UnparseableExtraFieldData} if such data + * exists. + * @return an array of the extra fields + * + * @since 1.1 + */ + public ZipExtraField[] getExtraFields(final boolean includeUnparseable) { + return includeUnparseable ? + getAllExtraFields() : + getParseableExtraFields(); + } + + private ZipExtraField[] getParseableExtraFieldsNoCopy() { + if (extraFields == null) { + return noExtraFields; + } + return extraFields; + } + + private ZipExtraField[] getParseableExtraFields() { + final ZipExtraField[] parseableExtraFields = getParseableExtraFieldsNoCopy(); + return (parseableExtraFields == extraFields) ? copyOf(parseableExtraFields) : parseableExtraFields; + } + + /** + * Get all extra fields, including unparseable ones. + * @return An array of all extra fields. Not necessarily a copy of internal data structures, hence private method + */ + private ZipExtraField[] getAllExtraFieldsNoCopy() { + if (extraFields == null) { + return getUnparseableOnly(); + } + return unparseableExtra != null ? getMergedFields() : extraFields; + } + + private ZipExtraField[] copyOf(final ZipExtraField[] src){ + return copyOf(src, src.length); + } + + private ZipExtraField[] copyOf(final ZipExtraField[] src, final int length) { + final ZipExtraField[] cpy = new ZipExtraField[length]; + System.arraycopy(src, 0, cpy, 0, Math.min(src.length, length)); + return cpy; + } + + private ZipExtraField[] getMergedFields() { + final ZipExtraField[] zipExtraFields = copyOf(extraFields, extraFields.length + 1); + zipExtraFields[extraFields.length] = unparseableExtra; + return zipExtraFields; + } + + private ZipExtraField[] getUnparseableOnly() { + return unparseableExtra == null ? noExtraFields : new ZipExtraField[] { unparseableExtra }; + } + + private ZipExtraField[] getAllExtraFields() { + final ZipExtraField[] allExtraFieldsNoCopy = getAllExtraFieldsNoCopy(); + return (allExtraFieldsNoCopy == extraFields) ? copyOf( allExtraFieldsNoCopy) : allExtraFieldsNoCopy; + } + /** + * Adds an extra field - replacing an already present extra field + * of the same type. + * + * <p>If no extra field of the same type exists, the field will be + * added as last field.</p> + * @param ze an extra field + */ + public void addExtraField(final ZipExtraField ze) { + if (ze instanceof UnparseableExtraFieldData) { + unparseableExtra = (UnparseableExtraFieldData) ze; + } else { + if (extraFields == null) { + extraFields = new ZipExtraField[]{ ze}; + } else { + if (getExtraField(ze.getHeaderId())!= null){ + removeExtraField(ze.getHeaderId()); + } + final ZipExtraField[] zipExtraFields = copyOf(extraFields, extraFields.length + 1); + zipExtraFields[zipExtraFields.length -1] = ze; + extraFields = zipExtraFields; + } + } + setExtra(); + } + + /** + * Adds an extra field - replacing an already present extra field + * of the same type. + * + * <p>The new extra field will be the first one.</p> + * @param ze an extra field + */ + public void addAsFirstExtraField(final ZipExtraField ze) { + if (ze instanceof UnparseableExtraFieldData) { + unparseableExtra = (UnparseableExtraFieldData) ze; + } else { + if (getExtraField(ze.getHeaderId()) != null){ + removeExtraField(ze.getHeaderId()); + } + final ZipExtraField[] copy = extraFields; + final int newLen = extraFields != null ? extraFields.length + 1: 1; + extraFields = new ZipExtraField[newLen]; + extraFields[0] = ze; + if (copy != null){ + System.arraycopy(copy, 0, extraFields, 1, extraFields.length - 1); + } + } + setExtra(); + } + + /** + * Remove an extra field. + * @param type the type of extra field to remove + */ + public void removeExtraField(final ZipShort type) { + if (extraFields == null) { + throw new java.util.NoSuchElementException(); + } + + final List<ZipExtraField> newResult = new ArrayList<>(); + for (final ZipExtraField extraField : extraFields) { + if (!type.equals(extraField.getHeaderId())){ + newResult.add( extraField); + } + } + if (extraFields.length == newResult.size()) { + throw new java.util.NoSuchElementException(); + } + extraFields = newResult.toArray(new ZipExtraField[newResult.size()]); + setExtra(); + } + + /** + * Removes unparseable extra field data. + * + * @since 1.1 + */ + public void removeUnparseableExtraFieldData() { + if (unparseableExtra == null) { + throw new java.util.NoSuchElementException(); + } + unparseableExtra = null; + setExtra(); + } + + /** + * Looks up an extra field by its header id. + * + * @param type the header id + * @return null if no such field exists. + */ + public ZipExtraField getExtraField(final ZipShort type) { + if (extraFields != null) { + for (final ZipExtraField extraField : extraFields) { + if (type.equals(extraField.getHeaderId())) { + return extraField; + } + } + } + return null; + } + + /** + * Looks up extra field data that couldn't be parsed correctly. + * + * @return null if no such field exists. + * + * @since 1.1 + */ + public UnparseableExtraFieldData getUnparseableExtraFieldData() { + return unparseableExtra; + } + + /** + * Parses the given bytes as extra field data and consumes any + * unparseable data as an {@link UnparseableExtraFieldData} + * instance. + * @param extra an array of bytes to be parsed into extra fields + * @throws RuntimeException if the bytes cannot be parsed + * @throws RuntimeException on error + */ + @Override + public void setExtra(final byte[] extra) throws RuntimeException { + try { + final ZipExtraField[] local = + ExtraFieldUtils.parse(extra, true, + ExtraFieldUtils.UnparseableExtraField.READ); + mergeExtraFields(local, true); + } catch (final ZipException e) { + // actually this is not possible as of Commons Compress 1.1 + throw new RuntimeException("Error parsing extra fields for entry: " //NOSONAR + + getName() + " - " + e.getMessage(), e); + } + } + + /** + * Unfortunately {@link java.util.zip.ZipOutputStream + * java.util.zip.ZipOutputStream} seems to access the extra data + * directly, so overriding getExtra doesn't help - we need to + * modify super's data directly. + */ + protected void setExtra() { + super.setExtra(ExtraFieldUtils.mergeLocalFileDataData(getAllExtraFieldsNoCopy())); + } + + /** + * Sets the central directory part of extra fields. + * @param b an array of bytes to be parsed into extra fields + */ + public void setCentralDirectoryExtra(final byte[] b) { + try { + final ZipExtraField[] central = + ExtraFieldUtils.parse(b, false, + ExtraFieldUtils.UnparseableExtraField.READ); + mergeExtraFields(central, false); + } catch (final ZipException e) { + throw new RuntimeException(e.getMessage(), e); //NOSONAR + } + } + + /** + * Retrieves the extra data for the local file data. + * @return the extra data for local file + */ + public byte[] getLocalFileDataExtra() { + final byte[] extra = getExtra(); + return extra != null ? extra : EMPTY; + } + + /** + * Retrieves the extra data for the central directory. + * @return the central directory extra data + */ + public byte[] getCentralDirectoryExtra() { + return ExtraFieldUtils.mergeCentralDirectoryData(getAllExtraFieldsNoCopy()); + } + + /** + * Get the name of the entry. + * + * <p>This method returns the raw name as it is stored inside of the archive.</p> + * + * @return the entry name + */ + @Override + public String getName() { + return name == null ? super.getName() : name; + } + + /** + * Is this entry a directory? + * @return true if the entry is a directory + */ + @Override + public boolean isDirectory() { + return getName().endsWith("/"); + } + + /** + * Set the name of the entry. + * @param name the name to use + */ + protected void setName(String name) { + if (name != null && getPlatform() == PLATFORM_FAT + && !name.contains("/")) { + name = name.replace('\\', '/'); + } + this.name = name; + } + + /** + * Gets the uncompressed size of the entry data. + * + * <p><b>Note</b>: {@link ZipArchiveInputStream} may create + * entries that return {@link #SIZE_UNKNOWN SIZE_UNKNOWN} as long + * as the entry hasn't been read completely.</p> + * + * @return the entry size + */ + @Override + public long getSize() { + return size; + } + + /** + * Sets the uncompressed size of the entry data. + * @param size the uncompressed size in bytes + * @throws IllegalArgumentException if the specified size is less + * than 0 + */ + @Override + public void setSize(final long size) { + if (size < 0) { + throw new IllegalArgumentException("invalid entry size"); + } + this.size = size; + } + + /** + * Sets the name using the raw bytes and the string created from + * it by guessing or using the configured encoding. + * @param name the name to use created from the raw bytes using + * the guessed or configured encoding + * @param rawName the bytes originally read as name from the + * archive + * @since 1.2 + */ + protected void setName(final String name, final byte[] rawName) { + setName(name); + this.rawName = rawName; + } + + /** + * Returns the raw bytes that made up the name before it has been + * converted using the configured or guessed encoding. + * + * <p>This method will return null if this instance has not been + * read from an archive.</p> + * + * @return the raw name bytes + * @since 1.2 + */ + public byte[] getRawName() { + if (rawName != null) { + final byte[] b = new byte[rawName.length]; + System.arraycopy(rawName, 0, b, 0, rawName.length); + return b; + } + return null; + } + + protected long getLocalHeaderOffset() { + return this.localHeaderOffset; + } + + protected void setLocalHeaderOffset(long localHeaderOffset) { + this.localHeaderOffset = localHeaderOffset; + } + + @Override + public long getDataOffset() { + return dataOffset; + } + + /** + * Sets the data offset. + * + * @param dataOffset + * new value of data offset. + */ + protected void setDataOffset(long dataOffset) { + this.dataOffset = dataOffset; + } + + @Override + public boolean isStreamContiguous() { + return isStreamContiguous; + } + + protected void setStreamContiguous(boolean isStreamContiguous) { + this.isStreamContiguous = isStreamContiguous; + } + + /** + * Get the hashCode of the entry. + * This uses the name as the hashcode. + * @return a hashcode. + */ + @Override + public int hashCode() { + // this method has severe consequences on performance. We cannot rely + // on the super.hashCode() method since super.getName() always return + // the empty string in the current implemention (there's no setter) + // so it is basically draining the performance of a hashmap lookup + return getName().hashCode(); + } + + /** + * The "general purpose bit" field. + * @return the general purpose bit + * @since 1.1 + */ + public GeneralPurposeBit getGeneralPurposeBit() { + return gpb; + } + + /** + * The "general purpose bit" field. + * @param b the general purpose bit + * @since 1.1 + */ + public void setGeneralPurposeBit(final GeneralPurposeBit b) { + gpb = b; + } + + /** + * If there are no extra fields, use the given fields as new extra + * data - otherwise merge the fields assuming the existing fields + * and the new fields stem from different locations inside the + * archive. + * @param f the extra fields to merge + * @param local whether the new fields originate from local data + */ + private void mergeExtraFields(final ZipExtraField[] f, final boolean local) + throws ZipException { + if (extraFields == null) { + setExtraFields(f); + } else { + for (final ZipExtraField element : f) { + ZipExtraField existing; + if (element instanceof UnparseableExtraFieldData) { + existing = unparseableExtra; + } else { + existing = getExtraField(element.getHeaderId()); + } + if (existing == null) { + addExtraField(element); + } else { + if (local) { + final byte[] b = element.getLocalFileDataData(); + existing.parseFromLocalFileData(b, 0, b.length); + } else { + final byte[] b = element.getCentralDirectoryData(); + existing.parseFromCentralDirectoryData(b, 0, b.length); + } + } + } + setExtra(); + } + } + + /** + * Wraps {@link java.util.zip.ZipEntry#getTime} with a {@link Date} as the + * entry's last modified date. + * + * <p>Changes to the implementation of {@link java.util.zip.ZipEntry#getTime} + * leak through and the returned value may depend on your local + * time zone as well as your version of Java.</p> + */ + @Override + public Date getLastModifiedDate() { + return new Date(getTime()); + } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + final ZipArchiveEntry other = (ZipArchiveEntry) obj; + final String myName = getName(); + final String otherName = other.getName(); + if (myName == null) { + if (otherName != null) { + return false; + } + } else if (!myName.equals(otherName)) { + return false; + } + String myComment = getComment(); + String otherComment = other.getComment(); + if (myComment == null) { + myComment = ""; + } + if (otherComment == null) { + otherComment = ""; + } + return getTime() == other.getTime() + && myComment.equals(otherComment) + && getInternalAttributes() == other.getInternalAttributes() + && getPlatform() == other.getPlatform() + && getExternalAttributes() == other.getExternalAttributes() + && getMethod() == other.getMethod() + && getSize() == other.getSize() + && getCrc() == other.getCrc() + && getCompressedSize() == other.getCompressedSize() + && Arrays.equals(getCentralDirectoryExtra(), + other.getCentralDirectoryExtra()) + && Arrays.equals(getLocalFileDataExtra(), + other.getLocalFileDataExtra()) + && localHeaderOffset == other.localHeaderOffset + && dataOffset == other.dataOffset + && gpb.equals(other.gpb); + } + + /** + * Sets the "version made by" field. + * @param versionMadeBy "version made by" field + * @since 1.11 + */ + public void setVersionMadeBy(final int versionMadeBy) { + this.versionMadeBy = versionMadeBy; + } + + /** + * Sets the "version required to expand" field. + * @param versionRequired "version required to expand" field + * @since 1.11 + */ + public void setVersionRequired(final int versionRequired) { + this.versionRequired = versionRequired; + } + + /** + * The "version required to expand" field. + * @return "version required to expand" field + * @since 1.11 + */ + public int getVersionRequired() { + return versionRequired; + } + + /** + * The "version made by" field. + * @return "version made by" field + * @since 1.11 + */ + public int getVersionMadeBy() { + return versionMadeBy; + } + + /** + * The content of the flags field. + * @return content of the flags field + * @since 1.11 + */ + public int getRawFlag() { + return rawFlag; + } + + /** + * Sets the content of the flags field. + * @param rawFlag content of the flags field + * @since 1.11 + */ + public void setRawFlag(final int rawFlag) { + this.rawFlag = rawFlag; + } + + /** + * The source of the name field value. + * @return source of the name field value + * @since 1.16 + */ + public NameSource getNameSource() { + return nameSource; + } + + /** + * Sets the source of the name field value. + * @param nameSource source of the name field value + * @since 1.16 + */ + public void setNameSource(NameSource nameSource) { + this.nameSource = nameSource; + } + + /** + * The source of the comment field value. + * @return source of the comment field value + * @since 1.16 + */ + public CommentSource getCommentSource() { + return commentSource; + } + + /** + * Sets the source of the comment field value. + * @param commentSource source of the comment field value + * @since 1.16 + */ + public void setCommentSource(CommentSource commentSource) { + this.commentSource = commentSource; + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryPredicate.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryPredicate.java new file mode 100644 index 000000000..e7122b9a6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryPredicate.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +/** + * A predicate to test if a #ZipArchiveEntry matches a criteria. + * Some day this can extend java.util.function.Predicate + * + * @since 1.10 + */ +public interface ZipArchiveEntryPredicate { + /** + * Indicate if the given entry should be included in the operation + * @param zipArchiveEntry the entry to test + * @return true if the entry should be included + */ + boolean test(ZipArchiveEntry zipArchiveEntry); +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryRequest.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryRequest.java new file mode 100644 index 000000000..9c13f7e27 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryRequest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.parallel.InputStreamSupplier; + +import java.io.InputStream; + +/** + * A Thread-safe representation of a ZipArchiveEntry that is used to add entries to parallel archives. + * + * @since 1.10 + */ +public class ZipArchiveEntryRequest { + /* + The zipArchiveEntry is not thread safe, and cannot be safely accessed by the getters of this class. + It is safely accessible during the construction part of this class and also after the + thread pools have been shut down. + */ + private final ZipArchiveEntry zipArchiveEntry; + private final InputStreamSupplier payloadSupplier; + private final int method; + + + private ZipArchiveEntryRequest(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier payloadSupplier) { + // this constructor has "safe" access to all member variables on zipArchiveEntry + this.zipArchiveEntry = zipArchiveEntry; + this.payloadSupplier = payloadSupplier; + this.method = zipArchiveEntry.getMethod(); + } + + /** + * Create a ZipArchiveEntryRequest + * @param zipArchiveEntry The entry to use + * @param payloadSupplier The payload that will be added to the zip entry. + * @return The newly created request + */ + public static ZipArchiveEntryRequest createZipArchiveEntryRequest(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier payloadSupplier) { + return new ZipArchiveEntryRequest(zipArchiveEntry, payloadSupplier); + } + + /** + * The paylaod that will be added to this zip entry + * @return The input stream. + */ + public InputStream getPayloadStream() { + return payloadSupplier.get(); + } + + /** + * The compression method to use + * @return The compression method to use + */ + public int getMethod(){ + return method; + } + + + /** + * Gets the underlying entry. Do not use this methods from threads that did not create the instance itself ! + * @return the zipeArchiveEntry that is basis for this request + */ + ZipArchiveEntry getZipArchiveEntry() { + return zipArchiveEntry; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryRequestSupplier.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryRequestSupplier.java new file mode 100644 index 000000000..c6ac957af --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryRequestSupplier.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +/** + * Supplies {@link ZipArchiveEntryRequest}. + * + * Implementations are required to support thread-handover. While an instance will + * not be accessed concurrently by multiple threads, it will be called by + * a different thread than it was created on. + * + * @since 1.13 + */ +public interface ZipArchiveEntryRequestSupplier { + + /** + * Supply a {@link ZipArchiveEntryRequest} to be added to a parallel archive. + * @return The {@link ZipArchiveEntryRequest} instance. Should never be null. + */ + ZipArchiveEntryRequest get(); +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java new file mode 100644 index 000000000..196e40239 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java @@ -0,0 +1,1278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.zip.CRC32; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.utils.ArchiveUtils; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; +import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; + +/** + * Implements an input stream that can read Zip archives. + * + * <p>As of Apache Commons Compress it transparently supports Zip64 + * extensions and thus individual entries and archives larger than 4 + * GB or with more than 65536 entries.</p> + * + * <p>The {@link ZipFile} class is preferred when reading from files + * as {@link ZipArchiveInputStream} is limited by not being able to + * read the central directory header before returning entries. In + * particular {@link ZipArchiveInputStream}</p> + * + * <ul> + * + * <li>may return entries that are not part of the central directory + * at all and shouldn't be considered part of the archive.</li> + * + * <li>may return several entries with the same name.</li> + * + * <li>will not return internal or external attributes.</li> + * + * <li>may return incomplete extra field data.</li> + * + * <li>may return unknown sizes and CRC values for entries until the + * next entry has been reached if the archive uses the data + * descriptor feature.</li> + * + * </ul> + * + * @see ZipFile + * @NotThreadSafe + */ +public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics { + + /** The zip encoding to use for filenames and the file comment. */ + private final ZipEncoding zipEncoding; + + // the provided encoding (for unit tests) + final String encoding; + + /** Whether to look for and use Unicode extra fields. */ + private final boolean useUnicodeExtraFields; + + /** Wrapped stream, will always be a PushbackInputStream. */ + private final InputStream in; + + /** Inflater used for all deflated entries. */ + private final Inflater inf = new Inflater(true); + + /** Buffer used to read from the wrapped stream. */ + private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); + + /** The entry that is currently being read. */ + private CurrentEntry current = null; + + /** Whether the stream has been closed. */ + private boolean closed = false; + + /** Whether the stream has reached the central directory - and thus found all entries. */ + private boolean hitCentralDirectory = false; + + /** + * When reading a stored entry that uses the data descriptor this + * stream has to read the full entry and caches it. This is the + * cache. + */ + private ByteArrayInputStream lastStoredEntry = null; + + /** Whether the stream will try to read STORED entries that use a data descriptor. */ + private boolean allowStoredEntriesWithDataDescriptor = false; + + /** Count decompressed bytes for current entry */ + private long uncompressedCount = 0; + + private static final int LFH_LEN = 30; + /* + local file header signature WORD + version needed to extract SHORT + general purpose bit flag SHORT + compression method SHORT + last mod file time SHORT + last mod file date SHORT + crc-32 WORD + compressed size WORD + uncompressed size WORD + file name length SHORT + extra field length SHORT + */ + + private static final int CFH_LEN = 46; + /* + central file header signature WORD + version made by SHORT + version needed to extract SHORT + general purpose bit flag SHORT + compression method SHORT + last mod file time SHORT + last mod file date SHORT + crc-32 WORD + compressed size WORD + uncompressed size WORD + file name length SHORT + extra field length SHORT + file comment length SHORT + disk number start SHORT + internal file attributes SHORT + external file attributes WORD + relative offset of local header WORD + */ + + private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; + + // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) + private final byte[] lfhBuf = new byte[LFH_LEN]; + private final byte[] skipBuf = new byte[1024]; + private final byte[] shortBuf = new byte[SHORT]; + private final byte[] wordBuf = new byte[WORD]; + private final byte[] twoDwordBuf = new byte[2 * DWORD]; + + private int entriesRead = 0; + + /** + * Create an instance using UTF-8 encoding + * @param inputStream the stream to wrap + */ + public ZipArchiveInputStream(final InputStream inputStream) { + this(inputStream, ZipEncodingHelper.UTF8); + } + + /** + * Create an instance using the specified encoding + * @param inputStream the stream to wrap + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @since 1.5 + */ + public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { + this(inputStream, encoding, true); + } + + /** + * Create an instance using the specified encoding + * @param inputStream the stream to wrap + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + */ + public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { + this(inputStream, encoding, useUnicodeExtraFields, false); + } + + /** + * Create an instance using the specified encoding + * @param inputStream the stream to wrap + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + * @param allowStoredEntriesWithDataDescriptor whether the stream + * will try to read STORED entries that use a data descriptor + * @since 1.1 + */ + public ZipArchiveInputStream(final InputStream inputStream, + final String encoding, + final boolean useUnicodeExtraFields, + final boolean allowStoredEntriesWithDataDescriptor) { + this.encoding = encoding; + zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + this.useUnicodeExtraFields = useUnicodeExtraFields; + in = new PushbackInputStream(inputStream, buf.capacity()); + this.allowStoredEntriesWithDataDescriptor = + allowStoredEntriesWithDataDescriptor; + // haven't read anything so far + buf.limit(0); + } + + public ZipArchiveEntry getNextZipEntry() throws IOException { + uncompressedCount = 0; + + boolean firstEntry = true; + if (closed || hitCentralDirectory) { + return null; + } + if (current != null) { + closeEntry(); + firstEntry = false; + } + + long currentHeaderOffset = getBytesRead(); + try { + if (firstEntry) { + // split archives have a special signature before the + // first local file header - look for it and fail with + // the appropriate error message if this is a split + // archive. + readFirstLocalFileHeader(lfhBuf); + } else { + readFully(lfhBuf); + } + } catch (final EOFException e) { //NOSONAR + return null; + } + + final ZipLong sig = new ZipLong(lfhBuf); + if (!sig.equals(ZipLong.LFH_SIG)) { + if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) { + hitCentralDirectory = true; + skipRemainderOfArchive(); + return null; + } + throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); + } + + int off = WORD; + current = new CurrentEntry(); + + final int versionMadeBy = ZipShort.getValue(lfhBuf, off); + off += SHORT; + current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); + + final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); + final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); + final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; + current.hasDataDescriptor = gpFlag.usesDataDescriptor(); + current.entry.setGeneralPurposeBit(gpFlag); + + off += SHORT; + + current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); + off += SHORT; + + final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); + current.entry.setTime(time); + off += WORD; + + ZipLong size = null, cSize = null; + if (!current.hasDataDescriptor) { + current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); + off += WORD; + + cSize = new ZipLong(lfhBuf, off); + off += WORD; + + size = new ZipLong(lfhBuf, off); + off += WORD; + } else { + off += 3 * WORD; + } + + final int fileNameLen = ZipShort.getValue(lfhBuf, off); + + off += SHORT; + + final int extraLen = ZipShort.getValue(lfhBuf, off); + off += SHORT; // NOSONAR - assignment as documentation + + final byte[] fileName = new byte[fileNameLen]; + readFully(fileName); + current.entry.setName(entryEncoding.decode(fileName), fileName); + if (hasUTF8Flag) { + current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); + } + + final byte[] extraData = new byte[extraLen]; + readFully(extraData); + current.entry.setExtra(extraData); + + if (!hasUTF8Flag && useUnicodeExtraFields) { + ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); + } + + processZip64Extra(size, cSize); + + current.entry.setLocalHeaderOffset(currentHeaderOffset); + current.entry.setDataOffset(getBytesRead()); + current.entry.setStreamContiguous(true); + + ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); + if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { + if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { + InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); + switch (m) { + case UNSHRINKING: + current.in = new UnshrinkingInputStream(bis); + break; + case IMPLODING: + current.in = new ExplodingInputStream( + current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), + current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), + bis); + break; + case BZIP2: + current.in = new BZip2CompressorInputStream(bis); + break; + case ENHANCED_DEFLATED: + current.in = new Deflate64CompressorInputStream(bis); + break; + default: + // we should never get here as all supported methods have been covered + // will cause an error when read is invoked, don't throw an exception here so people can + // skip unsupported entries + break; + } + } + } else if (m == ZipMethod.ENHANCED_DEFLATED) { + current.in = new Deflate64CompressorInputStream(in); + } + + entriesRead++; + return current.entry; + } + + /** + * Fills the given array with the first local file header and + * deals with splitting/spanning markers that may prefix the first + * LFH. + */ + private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { + readFully(lfh); + final ZipLong sig = new ZipLong(lfh); + if (sig.equals(ZipLong.DD_SIG)) { + throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); + } + + if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { + // The archive is not really split as only one segment was + // needed in the end. Just skip over the marker. + final byte[] missedLfhBytes = new byte[4]; + readFully(missedLfhBytes); + System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); + System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); + } + } + + /** + * Records whether a Zip64 extra is present and sets the size + * information from it if sizes are 0xFFFFFFFF and the entry + * doesn't use a data descriptor. + */ + private void processZip64Extra(final ZipLong size, final ZipLong cSize) { + final Zip64ExtendedInformationExtraField z64 = + (Zip64ExtendedInformationExtraField) + current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); + current.usesZip64 = z64 != null; + if (!current.hasDataDescriptor) { + if (z64 != null // same as current.usesZip64 but avoids NPE warning + && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { + current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); + current.entry.setSize(z64.getSize().getLongValue()); + } else { + current.entry.setCompressedSize(cSize.getValue()); + current.entry.setSize(size.getValue()); + } + } + } + + @Override + public ArchiveEntry getNextEntry() throws IOException { + return getNextZipEntry(); + } + + /** + * Whether this class is able to read the given entry. + * + * <p>May return false if it is set up to use encryption or a + * compression method that hasn't been implemented yet.</p> + * @since 1.1 + */ + @Override + public boolean canReadEntryData(final ArchiveEntry ae) { + if (ae instanceof ZipArchiveEntry) { + final ZipArchiveEntry ze = (ZipArchiveEntry) ae; + return ZipUtil.canHandleEntryData(ze) + && supportsDataDescriptorFor(ze) + && supportsCompressedSizeFor(ze); + } + return false; + } + + @Override + public int read(final byte[] buffer, final int offset, final int length) throws IOException { + if (closed) { + throw new IOException("The stream is closed"); + } + + if (current == null) { + return -1; + } + + // avoid int overflow, check null buffer + if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { + throw new ArrayIndexOutOfBoundsException(); + } + + ZipUtil.checkRequestedFeatures(current.entry); + if (!supportsDataDescriptorFor(current.entry)) { + throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, + current.entry); + } + if (!supportsCompressedSizeFor(current.entry)) { + throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, + current.entry); + } + + int read; + if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { + read = readStored(buffer, offset, length); + } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { + read = readDeflated(buffer, offset, length); + } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() + || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() + || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() + || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { + read = current.in.read(buffer, offset, length); + } else { + throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), + current.entry); + } + + if (read >= 0) { + current.crc.update(buffer, offset, read); + uncompressedCount += read; + } + + return read; + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { + return current.bytesRead; + } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { + return getBytesInflated(); + } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { + return ((UnshrinkingInputStream) current.in).getCompressedCount(); + } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { + return ((ExplodingInputStream) current.in).getCompressedCount(); + } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) { + return ((Deflate64CompressorInputStream) current.in).getCompressedCount(); + } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { + return ((BZip2CompressorInputStream) current.in).getCompressedCount(); + } else { + return -1; + } + } + + /** + * @since 1.17 + */ + @Override + public long getUncompressedCount() { + return uncompressedCount; + } + + /** + * Implementation of read for STORED entries. + */ + private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { + + if (current.hasDataDescriptor) { + if (lastStoredEntry == null) { + readStoredEntry(); + } + return lastStoredEntry.read(buffer, offset, length); + } + + final long csize = current.entry.getSize(); + if (current.bytesRead >= csize) { + return -1; + } + + if (buf.position() >= buf.limit()) { + buf.position(0); + final int l = in.read(buf.array()); + if (l == -1) { + buf.limit(0); + throw new IOException("Truncated ZIP file"); + } + buf.limit(l); + + count(l); + current.bytesReadFromStream += l; + } + + int toRead = Math.min(buf.remaining(), length); + if ((csize - current.bytesRead) < toRead) { + // if it is smaller than toRead then it fits into an int + toRead = (int) (csize - current.bytesRead); + } + buf.get(buffer, offset, toRead); + current.bytesRead += toRead; + return toRead; + } + + /** + * Implementation of read for DEFLATED entries. + */ + private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { + final int read = readFromInflater(buffer, offset, length); + if (read <= 0) { + if (inf.finished()) { + return -1; + } else if (inf.needsDictionary()) { + throw new ZipException("This archive needs a preset dictionary" + + " which is not supported by Commons" + + " Compress."); + } else if (read == -1) { + throw new IOException("Truncated ZIP file"); + } + } + return read; + } + + /** + * Potentially reads more bytes to fill the inflater's buffer and + * reads from it. + */ + private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { + int read = 0; + do { + if (inf.needsInput()) { + final int l = fill(); + if (l > 0) { + current.bytesReadFromStream += buf.limit(); + } else if (l == -1) { + return -1; + } else { + break; + } + } + try { + read = inf.inflate(buffer, offset, length); + } catch (final DataFormatException e) { + throw (IOException) new ZipException(e.getMessage()).initCause(e); + } + } while (read == 0 && inf.needsInput()); + return read; + } + + @Override + public void close() throws IOException { + if (!closed) { + closed = true; + try { + in.close(); + } finally { + inf.end(); + } + } + } + + /** + * Skips over and discards value bytes of data from this input + * stream. + * + * <p>This implementation may end up skipping over some smaller + * number of bytes, possibly 0, if and only if it reaches the end + * of the underlying stream.</p> + * + * <p>The actual number of bytes skipped is returned.</p> + * + * @param value the number of bytes to be skipped. + * @return the actual number of bytes skipped. + * @throws IOException - if an I/O error occurs. + * @throws IllegalArgumentException - if value is negative. + */ + @Override + public long skip(final long value) throws IOException { + if (value >= 0) { + long skipped = 0; + while (skipped < value) { + final long rem = value - skipped; + final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); + if (x == -1) { + return skipped; + } + skipped += x; + } + return skipped; + } + throw new IllegalArgumentException(); + } + + /** + * Checks if the signature matches what is expected for a zip file. + * Does not currently handle self-extracting zips which may have arbitrary + * leading content. + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true, if this stream is a zip archive stream, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < ZipArchiveOutputStream.LFH_SIG.length) { + return false; + } + + return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file + || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip + || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip + || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); + } + + private static boolean checksig(final byte[] signature, final byte[] expected) { + for (int i = 0; i < expected.length; i++) { + if (signature[i] != expected[i]) { + return false; + } + } + return true; + } + + /** + * Closes the current ZIP archive entry and positions the underlying + * stream to the beginning of the next entry. All per-entry variables + * and data structures are cleared. + * <p> + * If the compressed size of this entry is included in the entry header, + * then any outstanding bytes are simply skipped from the underlying + * stream without uncompressing them. This allows an entry to be safely + * closed even if the compression method is unsupported. + * <p> + * In case we don't know the compressed size of this entry or have + * already buffered too much data from the underlying stream to support + * uncompression, then the uncompression process is completed and the + * end position of the stream is adjusted based on the result of that + * process. + * + * @throws IOException if an error occurs + */ + private void closeEntry() throws IOException { + if (closed) { + throw new IOException("The stream is closed"); + } + if (current == null) { + return; + } + + // Ensure all entry bytes are read + if (currentEntryHasOutstandingBytes()) { + drainCurrentEntryData(); + } else { + // this is guaranteed to exhaust the stream + skip(Long.MAX_VALUE); //NOSONAR + + final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED + ? getBytesInflated() : current.bytesRead; + + // this is at most a single read() operation and can't + // exceed the range of int + final int diff = (int) (current.bytesReadFromStream - inB); + + // Pushback any required bytes + if (diff > 0) { + pushback(buf.array(), buf.limit() - diff, diff); + current.bytesReadFromStream -= diff; + } + + // Drain remainder of entry if not all data bytes were required + if (currentEntryHasOutstandingBytes()) { + drainCurrentEntryData(); + } + } + + if (lastStoredEntry == null && current.hasDataDescriptor) { + readDataDescriptor(); + } + + inf.reset(); + buf.clear().flip(); + current = null; + lastStoredEntry = null; + } + + /** + * If the compressed size of the current entry is included in the entry header + * and there are any outstanding bytes in the underlying stream, then + * this returns true. + * + * @return true, if current entry is determined to have outstanding bytes, false otherwise + */ + private boolean currentEntryHasOutstandingBytes() { + return current.bytesReadFromStream <= current.entry.getCompressedSize() + && !current.hasDataDescriptor; + } + + /** + * Read all data of the current entry from the underlying stream + * that hasn't been read, yet. + */ + private void drainCurrentEntryData() throws IOException { + long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; + while (remaining > 0) { + final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); + if (n < 0) { + throw new EOFException("Truncated ZIP entry: " + + ArchiveUtils.sanitize(current.entry.getName())); + } + count(n); + remaining -= n; + } + } + + /** + * Get the number of bytes Inflater has actually processed. + * + * <p>for Java < Java7 the getBytes* methods in + * Inflater/Deflater seem to return unsigned ints rather than + * longs that start over with 0 at 2^32.</p> + * + * <p>The stream knows how many bytes it has read, but not how + * many the Inflater actually consumed - it should be between the + * total number of bytes read for the entry and the total number + * minus the last read operation. Here we just try to make the + * value close enough to the bytes we've read by assuming the + * number of bytes consumed must be smaller than (or equal to) the + * number of bytes read but not smaller by more than 2^32.</p> + */ + private long getBytesInflated() { + long inB = inf.getBytesRead(); + if (current.bytesReadFromStream >= TWO_EXP_32) { + while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { + inB += TWO_EXP_32; + } + } + return inB; + } + + private int fill() throws IOException { + if (closed) { + throw new IOException("The stream is closed"); + } + final int length = in.read(buf.array()); + if (length > 0) { + buf.limit(length); + count(buf.limit()); + inf.setInput(buf.array(), 0, buf.limit()); + } + return length; + } + + private void readFully(final byte[] b) throws IOException { + readFully(b, 0); + } + + private void readFully(final byte[] b, final int off) throws IOException { + final int len = b.length - off; + final int count = IOUtils.readFully(in, b, off, len); + count(count); + if (count < len) { + throw new EOFException(); + } + } + + private void readDataDescriptor() throws IOException { + readFully(wordBuf); + ZipLong val = new ZipLong(wordBuf); + if (ZipLong.DD_SIG.equals(val)) { + // data descriptor with signature, skip sig + readFully(wordBuf); + val = new ZipLong(wordBuf); + } + current.entry.setCrc(val.getValue()); + + // if there is a ZIP64 extra field, sizes are eight bytes + // each, otherwise four bytes each. Unfortunately some + // implementations - namely Java7 - use eight bytes without + // using a ZIP64 extra field - + // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 + + // just read 16 bytes and check whether bytes nine to twelve + // look like one of the signatures of what could follow a data + // descriptor (ignoring archive decryption headers for now). + // If so, push back eight bytes and assume sizes are four + // bytes, otherwise sizes are eight bytes each. + readFully(twoDwordBuf); + final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); + if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { + pushback(twoDwordBuf, DWORD, DWORD); + current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); + current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); + } else { + current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); + current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); + } + } + + /** + * Whether this entry requires a data descriptor this library can work with. + * + * @return true if allowStoredEntriesWithDataDescriptor is true, + * the entry doesn't require any data descriptor or the method is + * DEFLATED or ENHANCED_DEFLATED. + */ + private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { + return !entry.getGeneralPurposeBit().usesDataDescriptor() + + || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) + || entry.getMethod() == ZipEntry.DEFLATED + || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); + } + + /** + * Whether the compressed size for the entry is either known or + * not required by the compression method being used. + */ + private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { + return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN + || entry.getMethod() == ZipEntry.DEFLATED + || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() + || (entry.getGeneralPurposeBit().usesDataDescriptor() + && allowStoredEntriesWithDataDescriptor + && entry.getMethod() == ZipEntry.STORED); + } + + /** + * Caches a stored entry that uses the data descriptor. + * + * <ul> + * <li>Reads a stored entry until the signature of a local file + * header, central directory header or data descriptor has been + * found.</li> + * <li>Stores all entry data in lastStoredEntry.</p> + * <li>Rewinds the stream to position at the data + * descriptor.</li> + * <li>reads the data descriptor</li> + * </ul> + * + * <p>After calling this method the entry should know its size, + * the entry's data is cached and the stream is positioned at the + * next local file or central directory header.</p> + */ + private void readStoredEntry() throws IOException { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int off = 0; + boolean done = false; + + // length of DD without signature + final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; + + while (!done) { + final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); + if (r <= 0) { + // read the whole archive without ever finding a + // central directory + throw new IOException("Truncated ZIP file"); + } + if (r + off < 4) { + // buffer too small to check for a signature, loop + off += r; + continue; + } + + done = bufferContainsSignature(bos, off, r, ddLen); + if (!done) { + off = cacheBytesRead(bos, off, r, ddLen); + } + } + + final byte[] b = bos.toByteArray(); + lastStoredEntry = new ByteArrayInputStream(b); + } + + private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); + private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); + private static final byte[] DD = ZipLong.DD_SIG.getBytes(); + + /** + * Checks whether the current buffer contains the signature of a + * "data descriptor", "local file header" or + * "central directory entry". + * + * <p>If it contains such a signature, reads the data descriptor + * and positions the stream right after the data descriptor.</p> + */ + private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) + throws IOException { + + boolean done = false; + int readTooMuch = 0; + for (int i = 0; !done && i < offset + lastRead - 4; i++) { + if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { + if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) + || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { + // found a LFH or CFH: + readTooMuch = offset + lastRead - i - expectedDDLen; + done = true; + } + else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { + // found DD: + readTooMuch = offset + lastRead - i; + done = true; + } + if (done) { + // * push back bytes read in excess as well as the data + // descriptor + // * copy the remaining bytes to cache + // * read data descriptor + pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); + bos.write(buf.array(), 0, i); + readDataDescriptor(); + } + } + } + return done; + } + + /** + * If the last read bytes could hold a data descriptor and an + * incomplete signature then save the last bytes to the front of + * the buffer and cache everything in front of the potential data + * descriptor into the given ByteArrayOutputStream. + * + * <p>Data descriptor plus incomplete signature (3 bytes in the + * worst case) can be 20 bytes max.</p> + */ + private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { + final int cacheable = offset + lastRead - expecteDDLen - 3; + if (cacheable > 0) { + bos.write(buf.array(), 0, cacheable); + System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); + offset = expecteDDLen + 3; + } else { + offset += lastRead; + } + return offset; + } + + private void pushback(final byte[] buf, final int offset, final int length) throws IOException { + ((PushbackInputStream) in).unread(buf, offset, length); + pushedBackBytes(length); + } + + // End of Central Directory Record + // end of central dir signature WORD + // number of this disk SHORT + // number of the disk with the + // start of the central directory SHORT + // total number of entries in the + // central directory on this disk SHORT + // total number of entries in + // the central directory SHORT + // size of the central directory WORD + // offset of start of central + // directory with respect to + // the starting disk number WORD + // .ZIP file comment length SHORT + // .ZIP file comment up to 64KB + // + + /** + * Reads the stream until it find the "End of central directory + * record" and consumes it as well. + */ + private void skipRemainderOfArchive() throws IOException { + // skip over central directory. One LFH has been read too much + // already. The calculation discounts file names and extra + // data so it will be too short. + realSkip((long) entriesRead * CFH_LEN - LFH_LEN); + findEocdRecord(); + realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); + readFully(shortBuf); + // file comment + realSkip(ZipShort.getValue(shortBuf)); + } + + /** + * Reads forward until the signature of the "End of central + * directory" record is found. + */ + private void findEocdRecord() throws IOException { + int currentByte = -1; + boolean skipReadCall = false; + while (skipReadCall || (currentByte = readOneByte()) > -1) { + skipReadCall = false; + if (!isFirstByteOfEocdSig(currentByte)) { + continue; + } + currentByte = readOneByte(); + if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { + if (currentByte == -1) { + break; + } + skipReadCall = isFirstByteOfEocdSig(currentByte); + continue; + } + currentByte = readOneByte(); + if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { + if (currentByte == -1) { + break; + } + skipReadCall = isFirstByteOfEocdSig(currentByte); + continue; + } + currentByte = readOneByte(); + if (currentByte == -1 + || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { + break; + } + skipReadCall = isFirstByteOfEocdSig(currentByte); + } + } + + /** + * Skips bytes by reading from the underlying stream rather than + * the (potentially inflating) archive stream - which {@link + * #skip} would do. + * + * Also updates bytes-read counter. + */ + private void realSkip(final long value) throws IOException { + if (value >= 0) { + long skipped = 0; + while (skipped < value) { + final long rem = value - skipped; + final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); + if (x == -1) { + return; + } + count(x); + skipped += x; + } + return; + } + throw new IllegalArgumentException(); + } + + /** + * Reads bytes by reading from the underlying stream rather than + * the (potentially inflating) archive stream - which {@link #read} would do. + * + * Also updates bytes-read counter. + */ + private int readOneByte() throws IOException { + final int b = in.read(); + if (b != -1) { + count(1); + } + return b; + } + + private boolean isFirstByteOfEocdSig(final int b) { + return b == ZipArchiveOutputStream.EOCD_SIG[0]; + } + + private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] { + 'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2', + }; + private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE); + + /** + * Checks whether this might be an APK Signing Block. + * + * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It + * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature + * and if we've found it, return true.</p> + * + * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold + * the local file header of the next entry. + * + * @return true if this looks like a APK signing block + * + * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a> + */ + private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException { + // length of block excluding the size field itself + BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader); + // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block, + // also subtract 16 bytes in order to position us at the magic string + BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length + - (long) APK_SIGNING_BLOCK_MAGIC.length)); + byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length]; + + try { + if (toSkip.signum() < 0) { + // suspectLocalFileHeader contains the start of suspect magic string + int off = suspectLocalFileHeader.length + toSkip.intValue(); + // length was shorter than magic length + if (off < DWORD) { + return false; + } + int bytesInBuffer = Math.abs(toSkip.intValue()); + System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length)); + if (bytesInBuffer < magic.length) { + readFully(magic, bytesInBuffer); + } + } else { + while (toSkip.compareTo(LONG_MAX) > 0) { + realSkip(Long.MAX_VALUE); + toSkip = toSkip.add(LONG_MAX.negate()); + } + realSkip(toSkip.longValue()); + readFully(magic); + } + } catch (EOFException ex) { //NOSONAR + // length was invalid + return false; + } + return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC); + } + + /** + * Structure collecting information for the entry that is + * currently being read. + */ + private static final class CurrentEntry { + + /** + * Current ZIP entry. + */ + private final ZipArchiveEntry entry = new ZipArchiveEntry(); + + /** + * Does the entry use a data descriptor? + */ + private boolean hasDataDescriptor; + + /** + * Does the entry have a ZIP64 extended information extra field. + */ + private boolean usesZip64; + + /** + * Number of bytes of entry content read by the client if the + * entry is STORED. + */ + private long bytesRead; + + /** + * Number of bytes of entry content read from the stream. + * + * <p>This may be more than the actual entry's length as some + * stuff gets buffered up and needs to be pushed back when the + * end of the entry has been reached.</p> + */ + private long bytesReadFromStream; + + /** + * The checksum calculated as the current entry is read. + */ + private final CRC32 crc = new CRC32(); + + /** + * The input stream decompressing the data for shrunk and imploded entries. + */ + private InputStream in; + } + + /** + * Bounded input stream adapted from commons-io + */ + private class BoundedInputStream extends InputStream { + + /** the wrapped input stream */ + private final InputStream in; + + /** the max length to provide */ + private final long max; + + /** the number of bytes already returned */ + private long pos = 0; + + /** + * Creates a new <code>BoundedInputStream</code> that wraps the given input + * stream and limits it to a certain size. + * + * @param in The wrapped input stream + * @param size The maximum number of bytes to return + */ + public BoundedInputStream(final InputStream in, final long size) { + this.max = size; + this.in = in; + } + + @Override + public int read() throws IOException { + if (max >= 0 && pos >= max) { + return -1; + } + final int result = in.read(); + pos++; + count(1); + current.bytesReadFromStream++; + return result; + } + + @Override + public int read(final byte[] b) throws IOException { + return this.read(b, 0, b.length); + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (max >= 0 && pos >= max) { + return -1; + } + final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; + final int bytesRead = in.read(b, off, (int) maxRead); + + if (bytesRead == -1) { + return -1; + } + + pos += bytesRead; + count(bytesRead); + current.bytesReadFromStream += bytesRead; + return bytesRead; + } + + @Override + public long skip(final long n) throws IOException { + final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; + final long skippedBytes = IOUtils.skip(in, toSkip); + pos += skippedBytes; + return skippedBytes; + } + + @Override + public int available() throws IOException { + if (max >= 0 && pos >= max) { + return 0; + } + return in.available(); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java new file mode 100644 index 000000000..76aa0e1a6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java @@ -0,0 +1,1696 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.Calendar; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.zip.Deflater; +import java.util.zip.ZipException; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.utils.IOUtils; + +import static org.apache.commons.compress.archivers.zip.ZipConstants.DATA_DESCRIPTOR_MIN_VERSION; +import static org.apache.commons.compress.archivers.zip.ZipConstants.DEFLATE_MIN_VERSION; +import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.INITIAL_VERSION; +import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; +import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; +import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; +import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MIN_VERSION; +import static org.apache.commons.compress.archivers.zip.ZipLong.putLong; +import static org.apache.commons.compress.archivers.zip.ZipShort.putShort; + +/** + * Reimplementation of {@link java.util.zip.ZipOutputStream + * java.util.zip.ZipOutputStream} that does handle the extended + * functionality of this package, especially internal/external file + * attributes and extra fields with different layouts for local file + * data and central directory entries. + * + * <p>This class will try to use {@link + * java.nio.channels.SeekableByteChannel} when it knows that the + * output is going to go to a file.</p> + * + * <p>If SeekableByteChannel cannot be used, this implementation will use + * a Data Descriptor to store size and CRC information for {@link + * #DEFLATED DEFLATED} entries, this means, you don't need to + * calculate them yourself. Unfortunately this is not possible for + * the {@link #STORED STORED} method, here setting the CRC and + * uncompressed size information is required before {@link + * #putArchiveEntry(ArchiveEntry)} can be called.</p> + * + * <p>As of Apache Commons Compress 1.3 it transparently supports Zip64 + * extensions and thus individual entries and archives larger than 4 + * GB or with more than 65536 entries in most cases but explicit + * control is provided via {@link #setUseZip64}. If the stream can not + * use SeekableByteChannel and you try to write a ZipArchiveEntry of + * unknown size then Zip64 extensions will be disabled by default.</p> + * + * @NotThreadSafe + */ +public class ZipArchiveOutputStream extends ArchiveOutputStream { + + static final int BUFFER_SIZE = 512; + private static final int LFH_SIG_OFFSET = 0; + private static final int LFH_VERSION_NEEDED_OFFSET = 4; + private static final int LFH_GPB_OFFSET = 6; + private static final int LFH_METHOD_OFFSET = 8; + private static final int LFH_TIME_OFFSET = 10; + private static final int LFH_CRC_OFFSET = 14; + private static final int LFH_COMPRESSED_SIZE_OFFSET = 18; + private static final int LFH_ORIGINAL_SIZE_OFFSET = 22; + private static final int LFH_FILENAME_LENGTH_OFFSET = 26; + private static final int LFH_EXTRA_LENGTH_OFFSET = 28; + private static final int LFH_FILENAME_OFFSET = 30; + private static final int CFH_SIG_OFFSET = 0; + private static final int CFH_VERSION_MADE_BY_OFFSET = 4; + private static final int CFH_VERSION_NEEDED_OFFSET = 6; + private static final int CFH_GPB_OFFSET = 8; + private static final int CFH_METHOD_OFFSET = 10; + private static final int CFH_TIME_OFFSET = 12; + private static final int CFH_CRC_OFFSET = 16; + private static final int CFH_COMPRESSED_SIZE_OFFSET = 20; + private static final int CFH_ORIGINAL_SIZE_OFFSET = 24; + private static final int CFH_FILENAME_LENGTH_OFFSET = 28; + private static final int CFH_EXTRA_LENGTH_OFFSET = 30; + private static final int CFH_COMMENT_LENGTH_OFFSET = 32; + private static final int CFH_DISK_NUMBER_OFFSET = 34; + private static final int CFH_INTERNAL_ATTRIBUTES_OFFSET = 36; + private static final int CFH_EXTERNAL_ATTRIBUTES_OFFSET = 38; + private static final int CFH_LFH_OFFSET = 42; + private static final int CFH_FILENAME_OFFSET = 46; + + /** indicates if this archive is finished. protected for use in Jar implementation */ + protected boolean finished = false; + + /** + * Compression method for deflated entries. + */ + public static final int DEFLATED = java.util.zip.ZipEntry.DEFLATED; + + /** + * Default compression level for deflated entries. + */ + public static final int DEFAULT_COMPRESSION = Deflater.DEFAULT_COMPRESSION; + + /** + * Compression method for stored entries. + */ + public static final int STORED = java.util.zip.ZipEntry.STORED; + + /** + * default encoding for file names and comment. + */ + static final String DEFAULT_ENCODING = ZipEncodingHelper.UTF8; + + /** + * General purpose flag, which indicates that filenames are + * written in UTF-8. + * @deprecated use {@link GeneralPurposeBit#UFT8_NAMES_FLAG} instead + */ + @Deprecated + public static final int EFS_FLAG = GeneralPurposeBit.UFT8_NAMES_FLAG; + + private static final byte[] EMPTY = new byte[0]; + + /** + * Current entry. + */ + private CurrentEntry entry; + + /** + * The file comment. + */ + private String comment = ""; + + /** + * Compression level for next entry. + */ + private int level = DEFAULT_COMPRESSION; + + /** + * Has the compression level changed when compared to the last + * entry? + */ + private boolean hasCompressionLevelChanged = false; + + /** + * Default compression method for next entry. + */ + private int method = java.util.zip.ZipEntry.DEFLATED; + + /** + * List of ZipArchiveEntries written so far. + */ + private final List<ZipArchiveEntry> entries = + new LinkedList<>(); + + private final StreamCompressor streamCompressor; + + /** + * Start of central directory. + */ + private long cdOffset = 0; + + /** + * Length of central directory. + */ + private long cdLength = 0; + + /** + * Helper, a 0 as ZipShort. + */ + private static final byte[] ZERO = {0, 0}; + + /** + * Helper, a 0 as ZipLong. + */ + private static final byte[] LZERO = {0, 0, 0, 0}; + + private static final byte[] ONE = ZipLong.getBytes(1L); + + /** + * Holds some book-keeping data for each entry. + */ + private final Map<ZipArchiveEntry, EntryMetaData> metaData = + new HashMap<>(); + + /** + * The encoding to use for filenames and the file comment. + * + * <p>For a list of possible values see <a + * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. + * Defaults to UTF-8.</p> + */ + private String encoding = DEFAULT_ENCODING; + + /** + * The zip encoding to use for filenames and the file comment. + * + * This field is of internal use and will be set in {@link + * #setEncoding(String)}. + */ + private ZipEncoding zipEncoding = + ZipEncodingHelper.getZipEncoding(DEFAULT_ENCODING); + + + /** + * This Deflater object is used for output. + * + */ + protected final Deflater def; + /** + * Optional random access output. + */ + private final SeekableByteChannel channel; + + private final OutputStream out; + + /** + * whether to use the general purpose bit flag when writing UTF-8 + * filenames or not. + */ + private boolean useUTF8Flag = true; + + /** + * Whether to encode non-encodable file names as UTF-8. + */ + private boolean fallbackToUTF8 = false; + + /** + * whether to create UnicodePathExtraField-s for each entry. + */ + private UnicodeExtraFieldPolicy createUnicodeExtraFields = UnicodeExtraFieldPolicy.NEVER; + + /** + * Whether anything inside this archive has used a ZIP64 feature. + * + * @since 1.3 + */ + private boolean hasUsedZip64 = false; + + private Zip64Mode zip64Mode = Zip64Mode.AsNeeded; + + private final byte[] copyBuffer = new byte[32768]; + private final Calendar calendarInstance = Calendar.getInstance(); + + /** + * Creates a new ZIP OutputStream filtering the underlying stream. + * @param out the outputstream to zip + */ + public ZipArchiveOutputStream(final OutputStream out) { + this.out = out; + this.channel = null; + def = new Deflater(level, true); + streamCompressor = StreamCompressor.create(out, def); + } + + /** + * Creates a new ZIP OutputStream writing to a File. Will use + * random access if possible. + * @param file the file to zip to + * @throws IOException on error + */ + public ZipArchiveOutputStream(final File file) throws IOException { + def = new Deflater(level, true); + OutputStream o = null; + SeekableByteChannel _channel = null; + StreamCompressor _streamCompressor = null; + try { + _channel = Files.newByteChannel(file.toPath(), + EnumSet.of(StandardOpenOption.CREATE, StandardOpenOption.WRITE, + StandardOpenOption.READ, + StandardOpenOption.TRUNCATE_EXISTING)); + // will never get opened properly when an exception is thrown so doesn't need to get closed + _streamCompressor = StreamCompressor.create(_channel, def); //NOSONAR + } catch (final IOException e) { + IOUtils.closeQuietly(_channel); + _channel = null; + o = new FileOutputStream(file); + _streamCompressor = StreamCompressor.create(o, def); + } + out = o; + channel = _channel; + streamCompressor = _streamCompressor; + } + + /** + * Creates a new ZIP OutputStream writing to a SeekableByteChannel. + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to write to an in-memory archive using random + * access.</p> + * + * @param channel the channel to zip to + * @throws IOException on error + * @since 1.13 + */ + public ZipArchiveOutputStream(SeekableByteChannel channel) throws IOException { + this.channel = channel; + def = new Deflater(level, true); + streamCompressor = StreamCompressor.create(channel, def); + out = null; + } + + /** + * This method indicates whether this archive is writing to a + * seekable stream (i.e., to a random access file). + * + * <p>For seekable streams, you don't need to calculate the CRC or + * uncompressed size for {@link #STORED} entries before + * invoking {@link #putArchiveEntry(ArchiveEntry)}. + * @return true if seekable + */ + public boolean isSeekable() { + return channel != null; + } + + /** + * The encoding to use for filenames and the file comment. + * + * <p>For a list of possible values see <a + * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. + * Defaults to UTF-8.</p> + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + */ + public void setEncoding(final String encoding) { + this.encoding = encoding; + this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + if (useUTF8Flag && !ZipEncodingHelper.isUTF8(encoding)) { + useUTF8Flag = false; + } + } + + /** + * The encoding to use for filenames and the file comment. + * + * @return null if using the platform's default character encoding. + */ + public String getEncoding() { + return encoding; + } + + /** + * Whether to set the language encoding flag if the file name + * encoding is UTF-8. + * + * <p>Defaults to true.</p> + * + * @param b whether to set the language encoding flag if the file + * name encoding is UTF-8 + */ + public void setUseLanguageEncodingFlag(final boolean b) { + useUTF8Flag = b && ZipEncodingHelper.isUTF8(encoding); + } + + /** + * Whether to create Unicode Extra Fields. + * + * <p>Defaults to NEVER.</p> + * + * @param b whether to create Unicode Extra Fields. + */ + public void setCreateUnicodeExtraFields(final UnicodeExtraFieldPolicy b) { + createUnicodeExtraFields = b; + } + + /** + * Whether to fall back to UTF and the language encoding flag if + * the file name cannot be encoded using the specified encoding. + * + * <p>Defaults to false.</p> + * + * @param b whether to fall back to UTF and the language encoding + * flag if the file name cannot be encoded using the specified + * encoding. + */ + public void setFallbackToUTF8(final boolean b) { + fallbackToUTF8 = b; + } + + /** + * Whether Zip64 extensions will be used. + * + * <p>When setting the mode to {@link Zip64Mode#Never Never}, + * {@link #putArchiveEntry}, {@link #closeArchiveEntry}, {@link + * #finish} or {@link #close} may throw a {@link + * Zip64RequiredException} if the entry's size or the total size + * of the archive exceeds 4GB or there are more than 65536 entries + * inside the archive. Any archive created in this mode will be + * readable by implementations that don't support Zip64.</p> + * + * <p>When setting the mode to {@link Zip64Mode#Always Always}, + * Zip64 extensions will be used for all entries. Any archive + * created in this mode may be unreadable by implementations that + * don't support Zip64 even if all its contents would be.</p> + * + * <p>When setting the mode to {@link Zip64Mode#AsNeeded + * AsNeeded}, Zip64 extensions will transparently be used for + * those entries that require them. This mode can only be used if + * the uncompressed size of the {@link ZipArchiveEntry} is known + * when calling {@link #putArchiveEntry} or the archive is written + * to a seekable output (i.e. you have used the {@link + * #ZipArchiveOutputStream(java.io.File) File-arg constructor}) - + * this mode is not valid when the output stream is not seekable + * and the uncompressed size is unknown when {@link + * #putArchiveEntry} is called.</p> + * + * <p>If no entry inside the resulting archive requires Zip64 + * extensions then {@link Zip64Mode#Never Never} will create the + * smallest archive. {@link Zip64Mode#AsNeeded AsNeeded} will + * create a slightly bigger archive if the uncompressed size of + * any entry has initially been unknown and create an archive + * identical to {@link Zip64Mode#Never Never} otherwise. {@link + * Zip64Mode#Always Always} will create an archive that is at + * least 24 bytes per entry bigger than the one {@link + * Zip64Mode#Never Never} would create.</p> + * + * <p>Defaults to {@link Zip64Mode#AsNeeded AsNeeded} unless + * {@link #putArchiveEntry} is called with an entry of unknown + * size and data is written to a non-seekable stream - in this + * case the default is {@link Zip64Mode#Never Never}.</p> + * + * @since 1.3 + * @param mode Whether Zip64 extensions will be used. + */ + public void setUseZip64(final Zip64Mode mode) { + zip64Mode = mode; + } + + /** + * {@inheritDoc} + * @throws Zip64RequiredException if the archive's size exceeds 4 + * GByte or there are more than 65535 entries inside the archive + * and {@link #setUseZip64} is {@link Zip64Mode#Never}. + */ + @Override + public void finish() throws IOException { + if (finished) { + throw new IOException("This archive has already been finished"); + } + + if (entry != null) { + throw new IOException("This archive contains unclosed entries."); + } + + cdOffset = streamCompressor.getTotalBytesWritten(); + writeCentralDirectoryInChunks(); + + cdLength = streamCompressor.getTotalBytesWritten() - cdOffset; + writeZip64CentralDirectory(); + writeCentralDirectoryEnd(); + metaData.clear(); + entries.clear(); + streamCompressor.close(); + finished = true; + } + + private void writeCentralDirectoryInChunks() throws IOException { + final int NUM_PER_WRITE = 1000; + final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(70 * NUM_PER_WRITE); + int count = 0; + for (final ZipArchiveEntry ze : entries) { + byteArrayOutputStream.write(createCentralFileHeader(ze)); + if (++count > NUM_PER_WRITE){ + writeCounted(byteArrayOutputStream.toByteArray()); + byteArrayOutputStream.reset(); + count = 0; + } + } + writeCounted(byteArrayOutputStream.toByteArray()); + } + + /** + * Writes all necessary data for this entry. + * @throws IOException on error + * @throws Zip64RequiredException if the entry's uncompressed or + * compressed size exceeds 4 GByte and {@link #setUseZip64} + * is {@link Zip64Mode#Never}. + */ + @Override + public void closeArchiveEntry() throws IOException { + preClose(); + + flushDeflater(); + + final long bytesWritten = streamCompressor.getTotalBytesWritten() - entry.dataStart; + final long realCrc = streamCompressor.getCrc32(); + entry.bytesRead = streamCompressor.getBytesRead(); + final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry); + final boolean actuallyNeedsZip64 = handleSizesAndCrc(bytesWritten, realCrc, effectiveMode); + closeEntry(actuallyNeedsZip64, false); + streamCompressor.reset(); + } + + /** + * Writes all necessary data for this entry. + * + * @param phased This entry is second phase of a 2-phase zip creation, size, compressed size and crc + * are known in ZipArchiveEntry + * @throws IOException on error + * @throws Zip64RequiredException if the entry's uncompressed or + * compressed size exceeds 4 GByte and {@link #setUseZip64} + * is {@link Zip64Mode#Never}. + */ + private void closeCopiedEntry(final boolean phased) throws IOException { + preClose(); + entry.bytesRead = entry.entry.getSize(); + final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry); + final boolean actuallyNeedsZip64 = checkIfNeedsZip64(effectiveMode); + closeEntry(actuallyNeedsZip64, phased); + } + + private void closeEntry(final boolean actuallyNeedsZip64, final boolean phased) throws IOException { + if (!phased && channel != null) { + rewriteSizesAndCrc(actuallyNeedsZip64); + } + + if (!phased) { + writeDataDescriptor(entry.entry); + } + entry = null; + } + + private void preClose() throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + + if (entry == null) { + throw new IOException("No current entry to close"); + } + + if (!entry.hasWritten) { + write(EMPTY, 0, 0); + } + } + + /** + * Adds an archive entry with a raw input stream. + * + * If crc, size and compressed size are supplied on the entry, these values will be used as-is. + * Zip64 status is re-established based on the settings in this stream, and the supplied value + * is ignored. + * + * The entry is put and closed immediately. + * + * @param entry The archive entry to add + * @param rawStream The raw input stream of a different entry. May be compressed/encrypted. + * @throws IOException If copying fails + */ + public void addRawArchiveEntry(final ZipArchiveEntry entry, final InputStream rawStream) + throws IOException { + final ZipArchiveEntry ae = new ZipArchiveEntry(entry); + if (hasZip64Extra(ae)) { + // Will be re-added as required. this may make the file generated with this method + // somewhat smaller than standard mode, + // since standard mode is unable to remove the zip 64 header. + ae.removeExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); + } + final boolean is2PhaseSource = ae.getCrc() != ZipArchiveEntry.CRC_UNKNOWN + && ae.getSize() != ArchiveEntry.SIZE_UNKNOWN + && ae.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN; + putArchiveEntry(ae, is2PhaseSource); + copyFromZipInputStream(rawStream); + closeCopiedEntry(is2PhaseSource); + } + + /** + * Ensures all bytes sent to the deflater are written to the stream. + */ + private void flushDeflater() throws IOException { + if (entry.entry.getMethod() == DEFLATED) { + streamCompressor.flushDeflater(); + } + } + + /** + * Ensures the current entry's size and CRC information is set to + * the values just written, verifies it isn't too big in the + * Zip64Mode.Never case and returns whether the entry would + * require a Zip64 extra field. + */ + private boolean handleSizesAndCrc(final long bytesWritten, final long crc, + final Zip64Mode effectiveMode) + throws ZipException { + if (entry.entry.getMethod() == DEFLATED) { + /* It turns out def.getBytesRead() returns wrong values if + * the size exceeds 4 GB on Java < Java7 + entry.entry.setSize(def.getBytesRead()); + */ + entry.entry.setSize(entry.bytesRead); + entry.entry.setCompressedSize(bytesWritten); + entry.entry.setCrc(crc); + + } else if (channel == null) { + if (entry.entry.getCrc() != crc) { + throw new ZipException("bad CRC checksum for entry " + + entry.entry.getName() + ": " + + Long.toHexString(entry.entry.getCrc()) + + " instead of " + + Long.toHexString(crc)); + } + + if (entry.entry.getSize() != bytesWritten) { + throw new ZipException("bad size for entry " + + entry.entry.getName() + ": " + + entry.entry.getSize() + + " instead of " + + bytesWritten); + } + } else { /* method is STORED and we used SeekableByteChannel */ + entry.entry.setSize(bytesWritten); + entry.entry.setCompressedSize(bytesWritten); + entry.entry.setCrc(crc); + } + + return checkIfNeedsZip64(effectiveMode); + } + + /** + * Verifies the sizes aren't too big in the Zip64Mode.Never case + * and returns whether the entry would require a Zip64 extra + * field. + */ + private boolean checkIfNeedsZip64(final Zip64Mode effectiveMode) + throws ZipException { + final boolean actuallyNeedsZip64 = isZip64Required(entry.entry, effectiveMode); + if (actuallyNeedsZip64 && effectiveMode == Zip64Mode.Never) { + throw new Zip64RequiredException(Zip64RequiredException.getEntryTooBigMessage(entry.entry)); + } + return actuallyNeedsZip64; + } + + private boolean isZip64Required(final ZipArchiveEntry entry1, final Zip64Mode requestedMode) { + return requestedMode == Zip64Mode.Always || isTooLageForZip32(entry1); + } + + private boolean isTooLageForZip32(final ZipArchiveEntry zipArchiveEntry){ + return zipArchiveEntry.getSize() >= ZIP64_MAGIC || zipArchiveEntry.getCompressedSize() >= ZIP64_MAGIC; + } + + /** + * When using random access output, write the local file header + * and potentiall the ZIP64 extra containing the correct CRC and + * compressed/uncompressed sizes. + */ + private void rewriteSizesAndCrc(final boolean actuallyNeedsZip64) + throws IOException { + final long save = channel.position(); + + channel.position(entry.localDataStart); + writeOut(ZipLong.getBytes(entry.entry.getCrc())); + if (!hasZip64Extra(entry.entry) || !actuallyNeedsZip64) { + writeOut(ZipLong.getBytes(entry.entry.getCompressedSize())); + writeOut(ZipLong.getBytes(entry.entry.getSize())); + } else { + writeOut(ZipLong.ZIP64_MAGIC.getBytes()); + writeOut(ZipLong.ZIP64_MAGIC.getBytes()); + } + + if (hasZip64Extra(entry.entry)) { + final ByteBuffer name = getName(entry.entry); + final int nameLen = name.limit() - name.position(); + // seek to ZIP64 extra, skip header and size information + channel.position(entry.localDataStart + 3 * WORD + 2 * SHORT + + nameLen + 2 * SHORT); + // inside the ZIP64 extra uncompressed size comes + // first, unlike the LFH, CD or data descriptor + writeOut(ZipEightByteInteger.getBytes(entry.entry.getSize())); + writeOut(ZipEightByteInteger.getBytes(entry.entry.getCompressedSize())); + + if (!actuallyNeedsZip64) { + // do some cleanup: + // * rewrite version needed to extract + channel.position(entry.localDataStart - 5 * SHORT); + writeOut(ZipShort.getBytes(versionNeededToExtract(entry.entry.getMethod(), false, false))); + + // * remove ZIP64 extra so it doesn't get written + // to the central directory + entry.entry.removeExtraField(Zip64ExtendedInformationExtraField + .HEADER_ID); + entry.entry.setExtra(); + + // * reset hasUsedZip64 if it has been set because + // of this entry + if (entry.causedUseOfZip64) { + hasUsedZip64 = false; + } + } + } + channel.position(save); + } + + /** + * {@inheritDoc} + * @throws ClassCastException if entry is not an instance of ZipArchiveEntry + * @throws Zip64RequiredException if the entry's uncompressed or + * compressed size is known to exceed 4 GByte and {@link #setUseZip64} + * is {@link Zip64Mode#Never}. + */ + @Override + public void putArchiveEntry(final ArchiveEntry archiveEntry) throws IOException { + putArchiveEntry(archiveEntry, false); + } + + /** + * Writes the headers for an archive entry to the output stream. + * The caller must then write the content to the stream and call + * {@link #closeArchiveEntry()} to complete the process. + + * @param archiveEntry The archiveEntry + * @param phased If true size, compressedSize and crc required to be known up-front in the archiveEntry + * @throws ClassCastException if entry is not an instance of ZipArchiveEntry + * @throws Zip64RequiredException if the entry's uncompressed or + * compressed size is known to exceed 4 GByte and {@link #setUseZip64} + * is {@link Zip64Mode#Never}. + */ + private void putArchiveEntry(final ArchiveEntry archiveEntry, final boolean phased) throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + + if (entry != null) { + closeArchiveEntry(); + } + + entry = new CurrentEntry((ZipArchiveEntry) archiveEntry); + entries.add(entry.entry); + + setDefaults(entry.entry); + + final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry); + validateSizeInformation(effectiveMode); + + if (shouldAddZip64Extra(entry.entry, effectiveMode)) { + + final Zip64ExtendedInformationExtraField z64 = getZip64Extra(entry.entry); + + ZipEightByteInteger size; + ZipEightByteInteger compressedSize; + if (phased) { + // sizes are already known + size = new ZipEightByteInteger(entry.entry.getSize()); + compressedSize = new ZipEightByteInteger(entry.entry.getCompressedSize()); + } else if (entry.entry.getMethod() == STORED + && entry.entry.getSize() != ArchiveEntry.SIZE_UNKNOWN) { + // actually, we already know the sizes + compressedSize = size = new ZipEightByteInteger(entry.entry.getSize()); + } else { + // just a placeholder, real data will be in data + // descriptor or inserted later via SeekableByteChannel + compressedSize = size = ZipEightByteInteger.ZERO; + } + z64.setSize(size); + z64.setCompressedSize(compressedSize); + entry.entry.setExtra(); + } + + if (entry.entry.getMethod() == DEFLATED && hasCompressionLevelChanged) { + def.setLevel(level); + hasCompressionLevelChanged = false; + } + writeLocalFileHeader((ZipArchiveEntry) archiveEntry, phased); + } + + /** + * Provides default values for compression method and last + * modification time. + */ + private void setDefaults(final ZipArchiveEntry entry) { + if (entry.getMethod() == -1) { // not specified + entry.setMethod(method); + } + + if (entry.getTime() == -1) { // not specified + entry.setTime(System.currentTimeMillis()); + } + } + + /** + * Throws an exception if the size is unknown for a stored entry + * that is written to a non-seekable output or the entry is too + * big to be written without Zip64 extra but the mode has been set + * to Never. + */ + private void validateSizeInformation(final Zip64Mode effectiveMode) + throws ZipException { + // Size/CRC not required if SeekableByteChannel is used + if (entry.entry.getMethod() == STORED && channel == null) { + if (entry.entry.getSize() == ArchiveEntry.SIZE_UNKNOWN) { + throw new ZipException("uncompressed size is required for" + + " STORED method when not writing to a" + + " file"); + } + if (entry.entry.getCrc() == ZipArchiveEntry.CRC_UNKNOWN) { + throw new ZipException("crc checksum is required for STORED" + + " method when not writing to a file"); + } + entry.entry.setCompressedSize(entry.entry.getSize()); + } + + if ((entry.entry.getSize() >= ZIP64_MAGIC + || entry.entry.getCompressedSize() >= ZIP64_MAGIC) + && effectiveMode == Zip64Mode.Never) { + throw new Zip64RequiredException(Zip64RequiredException + .getEntryTooBigMessage(entry.entry)); + } + } + + /** + * Whether to addd a Zip64 extended information extra field to the + * local file header. + * + * <p>Returns true if</p> + * + * <ul> + * <li>mode is Always</li> + * <li>or we already know it is going to be needed</li> + * <li>or the size is unknown and we can ensure it won't hurt + * other implementations if we add it (i.e. we can erase its + * usage</li> + * </ul> + */ + private boolean shouldAddZip64Extra(final ZipArchiveEntry entry, final Zip64Mode mode) { + return mode == Zip64Mode.Always + || entry.getSize() >= ZIP64_MAGIC + || entry.getCompressedSize() >= ZIP64_MAGIC + || (entry.getSize() == ArchiveEntry.SIZE_UNKNOWN + && channel != null && mode != Zip64Mode.Never); + } + + /** + * Set the file comment. + * @param comment the comment + */ + public void setComment(final String comment) { + this.comment = comment; + } + + /** + * Sets the compression level for subsequent entries. + * + * <p>Default is Deflater.DEFAULT_COMPRESSION.</p> + * @param level the compression level. + * @throws IllegalArgumentException if an invalid compression + * level is specified. + */ + public void setLevel(final int level) { + if (level < Deflater.DEFAULT_COMPRESSION + || level > Deflater.BEST_COMPRESSION) { + throw new IllegalArgumentException("Invalid compression level: " + + level); + } + if (this.level == level) { + return; + } + hasCompressionLevelChanged = true; + this.level = level; + } + + /** + * Sets the default compression method for subsequent entries. + * + * <p>Default is DEFLATED.</p> + * @param method an <code>int</code> from java.util.zip.ZipEntry + */ + public void setMethod(final int method) { + this.method = method; + } + + /** + * Whether this stream is able to write the given entry. + * + * <p>May return false if it is set up to use encryption or a + * compression method that hasn't been implemented yet.</p> + * @since 1.1 + */ + @Override + public boolean canWriteEntryData(final ArchiveEntry ae) { + if (ae instanceof ZipArchiveEntry) { + final ZipArchiveEntry zae = (ZipArchiveEntry) ae; + return zae.getMethod() != ZipMethod.IMPLODING.getCode() + && zae.getMethod() != ZipMethod.UNSHRINKING.getCode() + && ZipUtil.canHandleEntryData(zae); + } + return false; + } + + /** + * Writes bytes to ZIP entry. + * @param b the byte array to write + * @param offset the start position to write from + * @param length the number of bytes to write + * @throws IOException on error + */ + @Override + public void write(final byte[] b, final int offset, final int length) throws IOException { + if (entry == null) { + throw new IllegalStateException("No current entry"); + } + ZipUtil.checkRequestedFeatures(entry.entry); + final long writtenThisTime = streamCompressor.write(b, offset, length, entry.entry.getMethod()); + count(writtenThisTime); + } + + /** + * Write bytes to output or random access file. + * @param data the byte array to write + * @throws IOException on error + */ + private void writeCounted(final byte[] data) throws IOException { + streamCompressor.writeCounted(data); + } + + private void copyFromZipInputStream(final InputStream src) throws IOException { + if (entry == null) { + throw new IllegalStateException("No current entry"); + } + ZipUtil.checkRequestedFeatures(entry.entry); + entry.hasWritten = true; + int length; + while ((length = src.read(copyBuffer)) >= 0 ) + { + streamCompressor.writeCounted(copyBuffer, 0, length); + count( length ); + } + } + + /** + * Closes this output stream and releases any system resources + * associated with the stream. + * + * @throws IOException if an I/O error occurs. + * @throws Zip64RequiredException if the archive's size exceeds 4 + * GByte or there are more than 65535 entries inside the archive + * and {@link #setUseZip64} is {@link Zip64Mode#Never}. + */ + @Override + public void close() throws IOException { + try { + if (!finished) { + finish(); + } + } finally { + destroy(); + } + } + + /** + * Flushes this output stream and forces any buffered output bytes + * to be written out to the stream. + * + * @throws IOException if an I/O error occurs. + */ + @Override + public void flush() throws IOException { + if (out != null) { + out.flush(); + } + } + + /* + * Various ZIP constants shared between this class, ZipArchiveInputStream and ZipFile + */ + /** + * local file header signature + */ + static final byte[] LFH_SIG = ZipLong.LFH_SIG.getBytes(); //NOSONAR + /** + * data descriptor signature + */ + static final byte[] DD_SIG = ZipLong.DD_SIG.getBytes(); //NOSONAR + /** + * central file header signature + */ + static final byte[] CFH_SIG = ZipLong.CFH_SIG.getBytes(); //NOSONAR + /** + * end of central dir signature + */ + static final byte[] EOCD_SIG = ZipLong.getBytes(0X06054B50L); //NOSONAR + /** + * ZIP64 end of central dir signature + */ + static final byte[] ZIP64_EOCD_SIG = ZipLong.getBytes(0X06064B50L); //NOSONAR + /** + * ZIP64 end of central dir locator signature + */ + static final byte[] ZIP64_EOCD_LOC_SIG = ZipLong.getBytes(0X07064B50L); //NOSONAR + + /** + * Writes next block of compressed data to the output stream. + * @throws IOException on error + */ + protected final void deflate() throws IOException { + streamCompressor.deflate(); + } + + /** + * Writes the local file header entry + * @param ze the entry to write + * @throws IOException on error + */ + protected void writeLocalFileHeader(final ZipArchiveEntry ze) throws IOException { + writeLocalFileHeader(ze, false); + } + + private void writeLocalFileHeader(final ZipArchiveEntry ze, final boolean phased) throws IOException { + final boolean encodable = zipEncoding.canEncode(ze.getName()); + final ByteBuffer name = getName(ze); + + if (createUnicodeExtraFields != UnicodeExtraFieldPolicy.NEVER) { + addUnicodeExtraFields(ze, encodable, name); + } + + final long localHeaderStart = streamCompressor.getTotalBytesWritten(); + final byte[] localHeader = createLocalFileHeader(ze, name, encodable, phased, localHeaderStart); + metaData.put(ze, new EntryMetaData(localHeaderStart, usesDataDescriptor(ze.getMethod(), phased))); + entry.localDataStart = localHeaderStart + LFH_CRC_OFFSET; // At crc offset + writeCounted(localHeader); + entry.dataStart = streamCompressor.getTotalBytesWritten(); + } + + + private byte[] createLocalFileHeader(final ZipArchiveEntry ze, final ByteBuffer name, final boolean encodable, + final boolean phased, long archiveOffset) { + ResourceAlignmentExtraField oldAlignmentEx = + (ResourceAlignmentExtraField) ze.getExtraField(ResourceAlignmentExtraField.ID); + if (oldAlignmentEx != null) { + ze.removeExtraField(ResourceAlignmentExtraField.ID); + } + + int alignment = ze.getAlignment(); + if (alignment <= 0 && oldAlignmentEx != null) { + alignment = oldAlignmentEx.getAlignment(); + } + + if (alignment > 1 || (oldAlignmentEx != null && !oldAlignmentEx.allowMethodChange())) { + int oldLength = LFH_FILENAME_OFFSET + + name.limit() - name.position() + + ze.getLocalFileDataExtra().length; + + int padding = (int) ((-archiveOffset - oldLength - ZipExtraField.EXTRAFIELD_HEADER_SIZE + - ResourceAlignmentExtraField.BASE_SIZE) & + (alignment - 1)); + ze.addExtraField(new ResourceAlignmentExtraField(alignment, + oldAlignmentEx != null && oldAlignmentEx.allowMethodChange(), padding)); + } + + final byte[] extra = ze.getLocalFileDataExtra(); + final int nameLen = name.limit() - name.position(); + final int len = LFH_FILENAME_OFFSET + nameLen + extra.length; + final byte[] buf = new byte[len]; + + System.arraycopy(LFH_SIG, 0, buf, LFH_SIG_OFFSET, WORD); + + //store method in local variable to prevent multiple method calls + final int zipMethod = ze.getMethod(); + final boolean dataDescriptor = usesDataDescriptor(zipMethod, phased); + + putShort(versionNeededToExtract(zipMethod, hasZip64Extra(ze), dataDescriptor), buf, LFH_VERSION_NEEDED_OFFSET); + + final GeneralPurposeBit generalPurposeBit = getGeneralPurposeBits(!encodable && fallbackToUTF8, dataDescriptor); + generalPurposeBit.encode(buf, LFH_GPB_OFFSET); + + // compression method + putShort(zipMethod, buf, LFH_METHOD_OFFSET); + + ZipUtil.toDosTime(calendarInstance, ze.getTime(), buf, LFH_TIME_OFFSET); + + // CRC + if (phased){ + putLong(ze.getCrc(), buf, LFH_CRC_OFFSET); + } else if (zipMethod == DEFLATED || channel != null) { + System.arraycopy(LZERO, 0, buf, LFH_CRC_OFFSET, WORD); + } else { + putLong(ze.getCrc(), buf, LFH_CRC_OFFSET); + } + + // compressed length + // uncompressed length + if (hasZip64Extra(entry.entry)){ + // point to ZIP64 extended information extra field for + // sizes, may get rewritten once sizes are known if + // stream is seekable + ZipLong.ZIP64_MAGIC.putLong(buf, LFH_COMPRESSED_SIZE_OFFSET); + ZipLong.ZIP64_MAGIC.putLong(buf, LFH_ORIGINAL_SIZE_OFFSET); + } else if (phased) { + putLong(ze.getCompressedSize(), buf, LFH_COMPRESSED_SIZE_OFFSET); + putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET); + } else if (zipMethod == DEFLATED || channel != null) { + System.arraycopy(LZERO, 0, buf, LFH_COMPRESSED_SIZE_OFFSET, WORD); + System.arraycopy(LZERO, 0, buf, LFH_ORIGINAL_SIZE_OFFSET, WORD); + } else { // Stored + putLong(ze.getSize(), buf, LFH_COMPRESSED_SIZE_OFFSET); + putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET); + } + // file name length + putShort(nameLen, buf, LFH_FILENAME_LENGTH_OFFSET); + + // extra field length + putShort(extra.length, buf, LFH_EXTRA_LENGTH_OFFSET); + + // file name + System.arraycopy( name.array(), name.arrayOffset(), buf, LFH_FILENAME_OFFSET, nameLen); + + // extra fields + System.arraycopy(extra, 0, buf, LFH_FILENAME_OFFSET + nameLen, extra.length); + + return buf; + } + + + /** + * Adds UnicodeExtra fields for name and file comment if mode is + * ALWAYS or the data cannot be encoded using the configured + * encoding. + */ + private void addUnicodeExtraFields(final ZipArchiveEntry ze, final boolean encodable, + final ByteBuffer name) + throws IOException { + if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS + || !encodable) { + ze.addExtraField(new UnicodePathExtraField(ze.getName(), + name.array(), + name.arrayOffset(), + name.limit() + - name.position())); + } + + final String comm = ze.getComment(); + if (comm != null && !"".equals(comm)) { + + final boolean commentEncodable = zipEncoding.canEncode(comm); + + if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS + || !commentEncodable) { + final ByteBuffer commentB = getEntryEncoding(ze).encode(comm); + ze.addExtraField(new UnicodeCommentExtraField(comm, + commentB.array(), + commentB.arrayOffset(), + commentB.limit() + - commentB.position()) + ); + } + } + } + + /** + * Writes the data descriptor entry. + * @param ze the entry to write + * @throws IOException on error + */ + protected void writeDataDescriptor(final ZipArchiveEntry ze) throws IOException { + if (!usesDataDescriptor(ze.getMethod(), false)) { + return; + } + writeCounted(DD_SIG); + writeCounted(ZipLong.getBytes(ze.getCrc())); + if (!hasZip64Extra(ze)) { + writeCounted(ZipLong.getBytes(ze.getCompressedSize())); + writeCounted(ZipLong.getBytes(ze.getSize())); + } else { + writeCounted(ZipEightByteInteger.getBytes(ze.getCompressedSize())); + writeCounted(ZipEightByteInteger.getBytes(ze.getSize())); + } + } + + /** + * Writes the central file header entry. + * @param ze the entry to write + * @throws IOException on error + * @throws Zip64RequiredException if the archive's size exceeds 4 + * GByte and {@link Zip64Mode #setUseZip64} is {@link + * Zip64Mode#Never}. + */ + protected void writeCentralFileHeader(final ZipArchiveEntry ze) throws IOException { + final byte[] centralFileHeader = createCentralFileHeader(ze); + writeCounted(centralFileHeader); + } + + private byte[] createCentralFileHeader(final ZipArchiveEntry ze) throws IOException { + + final EntryMetaData entryMetaData = metaData.get(ze); + final boolean needsZip64Extra = hasZip64Extra(ze) + || ze.getCompressedSize() >= ZIP64_MAGIC + || ze.getSize() >= ZIP64_MAGIC + || entryMetaData.offset >= ZIP64_MAGIC + || zip64Mode == Zip64Mode.Always; + + if (needsZip64Extra && zip64Mode == Zip64Mode.Never) { + // must be the offset that is too big, otherwise an + // exception would have been throw in putArchiveEntry or + // closeArchiveEntry + throw new Zip64RequiredException(Zip64RequiredException + .ARCHIVE_TOO_BIG_MESSAGE); + } + + + handleZip64Extra(ze, entryMetaData.offset, needsZip64Extra); + + return createCentralFileHeader(ze, getName(ze), entryMetaData, needsZip64Extra); + } + + /** + * Writes the central file header entry. + * @param ze the entry to write + * @param name The encoded name + * @param entryMetaData meta data for this file + * @throws IOException on error + */ + private byte[] createCentralFileHeader(final ZipArchiveEntry ze, final ByteBuffer name, + final EntryMetaData entryMetaData, + final boolean needsZip64Extra) throws IOException { + final byte[] extra = ze.getCentralDirectoryExtra(); + + // file comment length + String comm = ze.getComment(); + if (comm == null) { + comm = ""; + } + + final ByteBuffer commentB = getEntryEncoding(ze).encode(comm); + final int nameLen = name.limit() - name.position(); + final int commentLen = commentB.limit() - commentB.position(); + final int len= CFH_FILENAME_OFFSET + nameLen + extra.length + commentLen; + final byte[] buf = new byte[len]; + + System.arraycopy(CFH_SIG, 0, buf, CFH_SIG_OFFSET, WORD); + + // version made by + // CheckStyle:MagicNumber OFF + putShort((ze.getPlatform() << 8) | (!hasUsedZip64 ? DATA_DESCRIPTOR_MIN_VERSION : ZIP64_MIN_VERSION), + buf, CFH_VERSION_MADE_BY_OFFSET); + + final int zipMethod = ze.getMethod(); + final boolean encodable = zipEncoding.canEncode(ze.getName()); + putShort(versionNeededToExtract(zipMethod, needsZip64Extra, entryMetaData.usesDataDescriptor), + buf, CFH_VERSION_NEEDED_OFFSET); + getGeneralPurposeBits(!encodable && fallbackToUTF8, entryMetaData.usesDataDescriptor).encode(buf, CFH_GPB_OFFSET); + + // compression method + putShort(zipMethod, buf, CFH_METHOD_OFFSET); + + + // last mod. time and date + ZipUtil.toDosTime(calendarInstance, ze.getTime(), buf, CFH_TIME_OFFSET); + + // CRC + // compressed length + // uncompressed length + putLong(ze.getCrc(), buf, CFH_CRC_OFFSET); + if (ze.getCompressedSize() >= ZIP64_MAGIC + || ze.getSize() >= ZIP64_MAGIC + || zip64Mode == Zip64Mode.Always) { + ZipLong.ZIP64_MAGIC.putLong(buf, CFH_COMPRESSED_SIZE_OFFSET); + ZipLong.ZIP64_MAGIC.putLong(buf, CFH_ORIGINAL_SIZE_OFFSET); + } else { + putLong(ze.getCompressedSize(), buf, CFH_COMPRESSED_SIZE_OFFSET); + putLong(ze.getSize(), buf, CFH_ORIGINAL_SIZE_OFFSET); + } + + putShort(nameLen, buf, CFH_FILENAME_LENGTH_OFFSET); + + // extra field length + putShort(extra.length, buf, CFH_EXTRA_LENGTH_OFFSET); + + putShort(commentLen, buf, CFH_COMMENT_LENGTH_OFFSET); + + // disk number start + System.arraycopy(ZERO, 0, buf, CFH_DISK_NUMBER_OFFSET, SHORT); + + // internal file attributes + putShort(ze.getInternalAttributes(), buf, CFH_INTERNAL_ATTRIBUTES_OFFSET); + + // external file attributes + putLong(ze.getExternalAttributes(), buf, CFH_EXTERNAL_ATTRIBUTES_OFFSET); + + // relative offset of LFH + if (entryMetaData.offset >= ZIP64_MAGIC || zip64Mode == Zip64Mode.Always) { + putLong(ZIP64_MAGIC, buf, CFH_LFH_OFFSET); + } else { + putLong(Math.min(entryMetaData.offset, ZIP64_MAGIC), buf, CFH_LFH_OFFSET); + } + + // file name + System.arraycopy(name.array(), name.arrayOffset(), buf, CFH_FILENAME_OFFSET, nameLen); + + final int extraStart = CFH_FILENAME_OFFSET + nameLen; + System.arraycopy(extra, 0, buf, extraStart, extra.length); + + final int commentStart = extraStart + extra.length; + + // file comment + System.arraycopy(commentB.array(), commentB.arrayOffset(), buf, commentStart, commentLen); + return buf; + } + + /** + * If the entry needs Zip64 extra information inside the central + * directory then configure its data. + */ + private void handleZip64Extra(final ZipArchiveEntry ze, final long lfhOffset, + final boolean needsZip64Extra) { + if (needsZip64Extra) { + final Zip64ExtendedInformationExtraField z64 = getZip64Extra(ze); + if (ze.getCompressedSize() >= ZIP64_MAGIC + || ze.getSize() >= ZIP64_MAGIC + || zip64Mode == Zip64Mode.Always) { + z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); + z64.setSize(new ZipEightByteInteger(ze.getSize())); + } else { + // reset value that may have been set for LFH + z64.setCompressedSize(null); + z64.setSize(null); + } + if (lfhOffset >= ZIP64_MAGIC || zip64Mode == Zip64Mode.Always) { + z64.setRelativeHeaderOffset(new ZipEightByteInteger(lfhOffset)); + } + ze.setExtra(); + } + } + + /** + * Writes the "End of central dir record". + * @throws IOException on error + * @throws Zip64RequiredException if the archive's size exceeds 4 + * GByte or there are more than 65535 entries inside the archive + * and {@link Zip64Mode #setUseZip64} is {@link Zip64Mode#Never}. + */ + protected void writeCentralDirectoryEnd() throws IOException { + writeCounted(EOCD_SIG); + + // disk numbers + writeCounted(ZERO); + writeCounted(ZERO); + + // number of entries + final int numberOfEntries = entries.size(); + if (numberOfEntries > ZIP64_MAGIC_SHORT + && zip64Mode == Zip64Mode.Never) { + throw new Zip64RequiredException(Zip64RequiredException + .TOO_MANY_ENTRIES_MESSAGE); + } + if (cdOffset > ZIP64_MAGIC && zip64Mode == Zip64Mode.Never) { + throw new Zip64RequiredException(Zip64RequiredException + .ARCHIVE_TOO_BIG_MESSAGE); + } + + final byte[] num = ZipShort.getBytes(Math.min(numberOfEntries, + ZIP64_MAGIC_SHORT)); + writeCounted(num); + writeCounted(num); + + // length and location of CD + writeCounted(ZipLong.getBytes(Math.min(cdLength, ZIP64_MAGIC))); + writeCounted(ZipLong.getBytes(Math.min(cdOffset, ZIP64_MAGIC))); + + // ZIP file comment + final ByteBuffer data = this.zipEncoding.encode(comment); + final int dataLen = data.limit() - data.position(); + writeCounted(ZipShort.getBytes(dataLen)); + streamCompressor.writeCounted(data.array(), data.arrayOffset(), dataLen); + } + + /** + * Writes the "ZIP64 End of central dir record" and + * "ZIP64 End of central dir locator". + * @throws IOException on error + * @since 1.3 + */ + protected void writeZip64CentralDirectory() throws IOException { + if (zip64Mode == Zip64Mode.Never) { + return; + } + + if (!hasUsedZip64 + && (cdOffset >= ZIP64_MAGIC || cdLength >= ZIP64_MAGIC + || entries.size() >= ZIP64_MAGIC_SHORT)) { + // actually "will use" + hasUsedZip64 = true; + } + + if (!hasUsedZip64) { + return; + } + + final long offset = streamCompressor.getTotalBytesWritten(); + + writeOut(ZIP64_EOCD_SIG); + // size, we don't have any variable length as we don't support + // the extensible data sector, yet + writeOut(ZipEightByteInteger + .getBytes(SHORT /* version made by */ + + SHORT /* version needed to extract */ + + WORD /* disk number */ + + WORD /* disk with central directory */ + + DWORD /* number of entries in CD on this disk */ + + DWORD /* total number of entries */ + + DWORD /* size of CD */ + + (long) DWORD /* offset of CD */ + )); + + // version made by and version needed to extract + writeOut(ZipShort.getBytes(ZIP64_MIN_VERSION)); + writeOut(ZipShort.getBytes(ZIP64_MIN_VERSION)); + + // disk numbers - four bytes this time + writeOut(LZERO); + writeOut(LZERO); + + // number of entries + final byte[] num = ZipEightByteInteger.getBytes(entries.size()); + writeOut(num); + writeOut(num); + + // length and location of CD + writeOut(ZipEightByteInteger.getBytes(cdLength)); + writeOut(ZipEightByteInteger.getBytes(cdOffset)); + + // no "zip64 extensible data sector" for now + + // and now the "ZIP64 end of central directory locator" + writeOut(ZIP64_EOCD_LOC_SIG); + + // disk number holding the ZIP64 EOCD record + writeOut(LZERO); + // relative offset of ZIP64 EOCD record + writeOut(ZipEightByteInteger.getBytes(offset)); + // total number of disks + writeOut(ONE); + } + + /** + * Write bytes to output or random access file. + * @param data the byte array to write + * @throws IOException on error + */ + protected final void writeOut(final byte[] data) throws IOException { + streamCompressor.writeOut(data, 0, data.length); + } + + + /** + * Write bytes to output or random access file. + * @param data the byte array to write + * @param offset the start position to write from + * @param length the number of bytes to write + * @throws IOException on error + */ + protected final void writeOut(final byte[] data, final int offset, final int length) + throws IOException { + streamCompressor.writeOut(data, offset, length); + } + + + private GeneralPurposeBit getGeneralPurposeBits(final boolean utfFallback, boolean usesDataDescriptor) { + final GeneralPurposeBit b = new GeneralPurposeBit(); + b.useUTF8ForNames(useUTF8Flag || utfFallback); + if (usesDataDescriptor) { + b.useDataDescriptor(true); + } + return b; + } + + private int versionNeededToExtract(final int zipMethod, final boolean zip64, final boolean usedDataDescriptor) { + if (zip64) { + return ZIP64_MIN_VERSION; + } + if (usedDataDescriptor) { + return DATA_DESCRIPTOR_MIN_VERSION; + } + return versionNeededToExtractMethod(zipMethod); + } + + private boolean usesDataDescriptor(final int zipMethod, boolean phased) { + return !phased && zipMethod == DEFLATED && channel == null; + } + + private int versionNeededToExtractMethod(int zipMethod) { + return zipMethod == DEFLATED ? DEFLATE_MIN_VERSION : INITIAL_VERSION; + } + + /** + * Creates a new zip entry taking some information from the given + * file and using the provided name. + * + * <p>The name will be adjusted to end with a forward slash "/" if + * the file is a directory. If the file is not a directory a + * potential trailing forward slash will be stripped from the + * entry name.</p> + * + * <p>Must not be used if the stream has already been closed.</p> + */ + @Override + public ArchiveEntry createArchiveEntry(final File inputFile, final String entryName) + throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + return new ZipArchiveEntry(inputFile, entryName); + } + + /** + * Get the existing ZIP64 extended information extra field or + * create a new one and add it to the entry. + * + * @since 1.3 + */ + private Zip64ExtendedInformationExtraField + getZip64Extra(final ZipArchiveEntry ze) { + if (entry != null) { + entry.causedUseOfZip64 = !hasUsedZip64; + } + hasUsedZip64 = true; + Zip64ExtendedInformationExtraField z64 = + (Zip64ExtendedInformationExtraField) + ze.getExtraField(Zip64ExtendedInformationExtraField + .HEADER_ID); + if (z64 == null) { + /* + System.err.println("Adding z64 for " + ze.getName() + + ", method: " + ze.getMethod() + + " (" + (ze.getMethod() == STORED) + ")" + + ", channel: " + (channel != null)); + */ + z64 = new Zip64ExtendedInformationExtraField(); + } + + // even if the field is there already, make sure it is the first one + ze.addAsFirstExtraField(z64); + + return z64; + } + + /** + * Is there a ZIP64 extended information extra field for the + * entry? + * + * @since 1.3 + */ + private boolean hasZip64Extra(final ZipArchiveEntry ze) { + return ze.getExtraField(Zip64ExtendedInformationExtraField + .HEADER_ID) + != null; + } + + /** + * If the mode is AsNeeded and the entry is a compressed entry of + * unknown size that gets written to a non-seekable stream then + * change the default to Never. + * + * @since 1.3 + */ + private Zip64Mode getEffectiveZip64Mode(final ZipArchiveEntry ze) { + if (zip64Mode != Zip64Mode.AsNeeded + || channel != null + || ze.getMethod() != DEFLATED + || ze.getSize() != ArchiveEntry.SIZE_UNKNOWN) { + return zip64Mode; + } + return Zip64Mode.Never; + } + + private ZipEncoding getEntryEncoding(final ZipArchiveEntry ze) { + final boolean encodable = zipEncoding.canEncode(ze.getName()); + return !encodable && fallbackToUTF8 + ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; + } + + private ByteBuffer getName(final ZipArchiveEntry ze) throws IOException { + return getEntryEncoding(ze).encode(ze.getName()); + } + + /** + * Closes the underlying stream/file without finishing the + * archive, the result will likely be a corrupt archive. + * + * <p>This method only exists to support tests that generate + * corrupt archives so they can clean up any temporary files.</p> + */ + void destroy() throws IOException { + try { + if (channel != null) { + channel.close(); + } + } finally { + if (out != null) { + out.close(); + } + } + } + + /** + * enum that represents the possible policies for creating Unicode + * extra fields. + */ + public static final class UnicodeExtraFieldPolicy { + /** + * Always create Unicode extra fields. + */ + public static final UnicodeExtraFieldPolicy ALWAYS = new UnicodeExtraFieldPolicy("always"); + /** + * Never create Unicode extra fields. + */ + public static final UnicodeExtraFieldPolicy NEVER = new UnicodeExtraFieldPolicy("never"); + /** + * Create Unicode extra fields for filenames that cannot be + * encoded using the specified encoding. + */ + public static final UnicodeExtraFieldPolicy NOT_ENCODEABLE = + new UnicodeExtraFieldPolicy("not encodeable"); + + private final String name; + private UnicodeExtraFieldPolicy(final String n) { + name = n; + } + @Override + public String toString() { + return name; + } + } + + /** + * Structure collecting information for the entry that is + * currently being written. + */ + private static final class CurrentEntry { + private CurrentEntry(final ZipArchiveEntry entry) { + this.entry = entry; + } + /** + * Current ZIP entry. + */ + private final ZipArchiveEntry entry; + /** + * Offset for CRC entry in the local file header data for the + * current entry starts here. + */ + private long localDataStart = 0; + /** + * Data for local header data + */ + private long dataStart = 0; + /** + * Number of bytes read for the current entry (can't rely on + * Deflater#getBytesRead) when using DEFLATED. + */ + private long bytesRead = 0; + /** + * Whether current entry was the first one using ZIP64 features. + */ + private boolean causedUseOfZip64 = false; + /** + * Whether write() has been called at all. + * + * <p>In order to create a valid archive {@link + * #closeArchiveEntry closeArchiveEntry} will write an empty + * array to get the CRC right if nothing has been written to + * the stream at all.</p> + */ + private boolean hasWritten; + } + + private static final class EntryMetaData { + private final long offset; + private final boolean usesDataDescriptor; + private EntryMetaData(long offset, boolean usesDataDescriptor) { + this.offset = offset; + this.usesDataDescriptor = usesDataDescriptor; + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipConstants.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipConstants.java new file mode 100644 index 000000000..c230991a3 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipConstants.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +/** + * Various constants used throughout the package. + * + * @since 1.3 + */ +final class ZipConstants { + /** Masks last eight bits */ + static final int BYTE_MASK = 0xFF; + + /** length of a ZipShort in bytes */ + static final int SHORT = 2; + + /** length of a ZipLong in bytes */ + static final int WORD = 4; + + /** length of a ZipEightByteInteger in bytes */ + static final int DWORD = 8; + + /** Initial ZIP specification version */ + static final int INITIAL_VERSION = 10; + + /** + * ZIP specification version that introduced DEFLATE compression method. + * @since 1.15 + */ + static final int DEFLATE_MIN_VERSION = 20; + + /** ZIP specification version that introduced data descriptor method */ + static final int DATA_DESCRIPTOR_MIN_VERSION = 20; + + /** ZIP specification version that introduced ZIP64 */ + static final int ZIP64_MIN_VERSION = 45; + + /** + * Value stored in two-byte size and similar fields if ZIP64 + * extensions are used. + */ + static final int ZIP64_MAGIC_SHORT = 0xFFFF; + + /** + * Value stored in four-byte size and similar fields if ZIP64 + * extensions are used. + */ + static final long ZIP64_MAGIC = 0xFFFFFFFFL; + + private ZipConstants() { } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEightByteInteger.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEightByteInteger.java new file mode 100644 index 000000000..9d9e2ec82 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEightByteInteger.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.Serializable; +import java.math.BigInteger; + +import static org.apache.commons.compress.archivers.zip.ZipConstants.BYTE_MASK; + +/** + * Utility class that represents an eight byte integer with conversion + * rules for the little endian byte order of ZIP files. + * @Immutable + * + * @since 1.2 + */ +public final class ZipEightByteInteger implements Serializable { + private static final long serialVersionUID = 1L; + + private static final int BYTE_1 = 1; + private static final int BYTE_1_MASK = 0xFF00; + private static final int BYTE_1_SHIFT = 8; + + private static final int BYTE_2 = 2; + private static final int BYTE_2_MASK = 0xFF0000; + private static final int BYTE_2_SHIFT = 16; + + private static final int BYTE_3 = 3; + private static final long BYTE_3_MASK = 0xFF000000L; + private static final int BYTE_3_SHIFT = 24; + + private static final int BYTE_4 = 4; + private static final long BYTE_4_MASK = 0xFF00000000L; + private static final int BYTE_4_SHIFT = 32; + + private static final int BYTE_5 = 5; + private static final long BYTE_5_MASK = 0xFF0000000000L; + private static final int BYTE_5_SHIFT = 40; + + private static final int BYTE_6 = 6; + private static final long BYTE_6_MASK = 0xFF000000000000L; + private static final int BYTE_6_SHIFT = 48; + + private static final int BYTE_7 = 7; + private static final long BYTE_7_MASK = 0x7F00000000000000L; + private static final int BYTE_7_SHIFT = 56; + + private static final int LEFTMOST_BIT_SHIFT = 63; + private static final byte LEFTMOST_BIT = (byte) 0x80; + + private final BigInteger value; + + public static final ZipEightByteInteger ZERO = new ZipEightByteInteger(0); + + /** + * Create instance from a number. + * @param value the long to store as a ZipEightByteInteger + */ + public ZipEightByteInteger(final long value) { + this(BigInteger.valueOf(value)); + } + + /** + * Create instance from a number. + * @param value the BigInteger to store as a ZipEightByteInteger + */ + public ZipEightByteInteger(final BigInteger value) { + this.value = value; + } + + /** + * Create instance from bytes. + * @param bytes the bytes to store as a ZipEightByteInteger + */ + public ZipEightByteInteger (final byte[] bytes) { + this(bytes, 0); + } + + /** + * Create instance from the eight bytes starting at offset. + * @param bytes the bytes to store as a ZipEightByteInteger + * @param offset the offset to start + */ + public ZipEightByteInteger (final byte[] bytes, final int offset) { + value = ZipEightByteInteger.getValue(bytes, offset); + } + + /** + * Get value as eight bytes in big endian byte order. + * @return value as eight bytes in big endian order + */ + public byte[] getBytes() { + return ZipEightByteInteger.getBytes(value); + } + + /** + * Get value as Java long. + * @return value as a long + */ + public long getLongValue() { + return value.longValue(); + } + + /** + * Get value as Java long. + * @return value as a long + */ + public BigInteger getValue() { + return value; + } + + /** + * Get value as eight bytes in big endian byte order. + * @param value the value to convert + * @return value as eight bytes in big endian byte order + */ + public static byte[] getBytes(final long value) { + return getBytes(BigInteger.valueOf(value)); + } + + /** + * Get value as eight bytes in big endian byte order. + * @param value the value to convert + * @return value as eight bytes in big endian byte order + */ + public static byte[] getBytes(final BigInteger value) { + final byte[] result = new byte[8]; + final long val = value.longValue(); + result[0] = (byte) ((val & BYTE_MASK)); + result[BYTE_1] = (byte) ((val & BYTE_1_MASK) >> BYTE_1_SHIFT); + result[BYTE_2] = (byte) ((val & BYTE_2_MASK) >> BYTE_2_SHIFT); + result[BYTE_3] = (byte) ((val & BYTE_3_MASK) >> BYTE_3_SHIFT); + result[BYTE_4] = (byte) ((val & BYTE_4_MASK) >> BYTE_4_SHIFT); + result[BYTE_5] = (byte) ((val & BYTE_5_MASK) >> BYTE_5_SHIFT); + result[BYTE_6] = (byte) ((val & BYTE_6_MASK) >> BYTE_6_SHIFT); + result[BYTE_7] = (byte) ((val & BYTE_7_MASK) >> BYTE_7_SHIFT); + if (value.testBit(LEFTMOST_BIT_SHIFT)) { + result[BYTE_7] |= LEFTMOST_BIT; + } + return result; + } + + /** + * Helper method to get the value as a Java long from eight bytes + * starting at given array offset + * @param bytes the array of bytes + * @param offset the offset to start + * @return the corresponding Java long value + */ + public static long getLongValue(final byte[] bytes, final int offset) { + return getValue(bytes, offset).longValue(); + } + + /** + * Helper method to get the value as a Java BigInteger from eight + * bytes starting at given array offset + * @param bytes the array of bytes + * @param offset the offset to start + * @return the corresponding Java BigInteger value + */ + public static BigInteger getValue(final byte[] bytes, final int offset) { + long value = ((long) bytes[offset + BYTE_7] << BYTE_7_SHIFT) & BYTE_7_MASK; + value += ((long) bytes[offset + BYTE_6] << BYTE_6_SHIFT) & BYTE_6_MASK; + value += ((long) bytes[offset + BYTE_5] << BYTE_5_SHIFT) & BYTE_5_MASK; + value += ((long) bytes[offset + BYTE_4] << BYTE_4_SHIFT) & BYTE_4_MASK; + value += ((long) bytes[offset + BYTE_3] << BYTE_3_SHIFT) & BYTE_3_MASK; + value += ((long) bytes[offset + BYTE_2] << BYTE_2_SHIFT) & BYTE_2_MASK; + value += ((long) bytes[offset + BYTE_1] << BYTE_1_SHIFT) & BYTE_1_MASK; + value += ((long) bytes[offset] & BYTE_MASK); + final BigInteger val = BigInteger.valueOf(value); + return (bytes[offset + BYTE_7] & LEFTMOST_BIT) == LEFTMOST_BIT + ? val.setBit(LEFTMOST_BIT_SHIFT) : val; + } + + /** + * Helper method to get the value as a Java long from an eight-byte array + * @param bytes the array of bytes + * @return the corresponding Java long value + */ + public static long getLongValue(final byte[] bytes) { + return getLongValue(bytes, 0); + } + + /** + * Helper method to get the value as a Java long from an eight-byte array + * @param bytes the array of bytes + * @return the corresponding Java BigInteger value + */ + public static BigInteger getValue(final byte[] bytes) { + return getValue(bytes, 0); + } + + /** + * Override to make two instances with same value equal. + * @param o an object to compare + * @return true if the objects are equal + */ + @Override + public boolean equals(final Object o) { + if (o == null || !(o instanceof ZipEightByteInteger)) { + return false; + } + return value.equals(((ZipEightByteInteger) o).getValue()); + } + + /** + * Override to make two instances with same value equal. + * @return the hashCode of the value stored in the ZipEightByteInteger + */ + @Override + public int hashCode() { + return value.hashCode(); + } + + @Override + public String toString() { + return "ZipEightByteInteger value: " + value; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java new file mode 100644 index 000000000..dacd06369 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * An interface for encoders that do a pretty encoding of ZIP + * filenames. + * + * <p>There are mostly two implementations, one that uses java.nio + * {@link java.nio.charset.Charset Charset} and one implementation, + * which copes with simple 8 bit charsets, because java-1.4 did not + * support Cp437 in java.nio.</p> + * + * <p>The main reason for defining an own encoding layer comes from + * the problems with {@link java.lang.String#getBytes(String) + * String.getBytes}, which encodes unknown characters as ASCII + * quotation marks ('?'). Quotation marks are per definition an + * invalid filename on some operating systems like Windows, which + * leads to ignored ZIP entries.</p> + * + * <p>All implementations should implement this interface in a + * reentrant way.</p> + */ +public interface ZipEncoding { + /** + * Check, whether the given string may be losslessly encoded using this + * encoding. + * + * @param name A filename or ZIP comment. + * @return Whether the given name may be encoded with out any losses. + */ + boolean canEncode(String name); + + /** + * Encode a filename or a comment to a byte array suitable for + * storing it to a serialized zip entry. + * + * <p>Examples for CP 437 (in pseudo-notation, right hand side is + * C-style notation):</p> + * <pre> + * encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt" + * encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt" + * </pre> + * + * @param name A filename or ZIP comment. + * @return A byte buffer with a backing array containing the + * encoded name. Unmappable characters or malformed + * character sequences are mapped to a sequence of utf-16 + * words encoded in the format <code>%Uxxxx</code>. It is + * assumed, that the byte buffer is positioned at the + * beginning of the encoded result, the byte buffer has a + * backing array and the limit of the byte buffer points + * to the end of the encoded result. + * @throws IOException on error + */ + ByteBuffer encode(String name) throws IOException; + + /** + * @param data The byte values to decode. + * @return The decoded string. + * @throws IOException on error + */ + String decode(byte [] data) throws IOException; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java new file mode 100644 index 000000000..8aeb789e2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.charset.UnsupportedCharsetException; + +/** + * Static helper functions for robustly encoding filenames in zip files. + */ +public abstract class ZipEncodingHelper { + + + /** + * name of the encoding UTF-8 + */ + static final String UTF8 = "UTF8"; + + /** + * the encoding UTF-8 + */ + static final ZipEncoding UTF8_ZIP_ENCODING = getZipEncoding(UTF8); + + /** + * Instantiates a zip encoding. An NIO based character set encoder/decoder will be returned. + * As a special case, if the character set is UTF-8, the nio encoder will be configured replace malformed and + * unmappable characters with '?'. This matches existing behavior from the older fallback encoder. + * <p> + * If the requested characer set cannot be found, the platform default will + * be used instead. + * </p> + * @param name The name of the zip encoding. Specify {@code null} for + * the platform's default encoding. + * @return A zip encoding for the given encoding name. + */ + public static ZipEncoding getZipEncoding(final String name) { + Charset cs = Charset.defaultCharset(); + if (name != null) { + try { + cs = Charset.forName(name); + } catch (UnsupportedCharsetException e) { // NOSONAR we use the default encoding instead + } + } + boolean useReplacement = isUTF8(cs.name()); + return new NioZipEncoding(cs, useReplacement); + } + + /** + * Returns whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. + * + * @param charsetName If the given name is null, then check the platform's default encoding. + */ + static boolean isUTF8(String charsetName) { + if (charsetName == null) { + // check platform's default encoding + charsetName = Charset.defaultCharset().name(); + } + if (StandardCharsets.UTF_8.name().equalsIgnoreCase(charsetName)) { + return true; + } + for (final String alias : StandardCharsets.UTF_8.aliases()) { + if (alias.equalsIgnoreCase(charsetName)) { + return true; + } + } + return false; + } + + static ByteBuffer growBufferBy(ByteBuffer buffer, int increment) { + buffer.limit(buffer.position()); + buffer.rewind(); + + final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment); + + on.put(buffer); + return on; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipExtraField.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipExtraField.java new file mode 100644 index 000000000..2c44b2a52 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipExtraField.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import java.util.zip.ZipException; + +/** + * General format of extra field data. + * + * <p>Extra fields usually appear twice per file, once in the local + * file data and once in the central directory. Usually they are the + * same, but they don't have to be. {@link + * java.util.zip.ZipOutputStream java.util.zip.ZipOutputStream} will + * only use the local file data in both places.</p> + * + */ +public interface ZipExtraField { + /** + * Size of an extra field field header (id + length). + * @since 1.14 + */ + int EXTRAFIELD_HEADER_SIZE = 4; + + /** + * The Header-ID. + * + * @return The HeaderId value + */ + ZipShort getHeaderId(); + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * @return the length of the field in the local file data + */ + ZipShort getLocalFileDataLength(); + + /** + * Length of the extra field in the central directory - without + * Header-ID or length specifier. + * @return the length of the field in the central directory + */ + ZipShort getCentralDirectoryLength(); + + /** + * The actual data to put into local file data - without Header-ID + * or length specifier. + * @return the data + */ + byte[] getLocalFileDataData(); + + /** + * The actual data to put into central directory - without Header-ID or + * length specifier. + * @return the data + */ + byte[] getCentralDirectoryData(); + + /** + * Populate data from this array as if it was in local file data. + * + * @param buffer the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + * @throws ZipException on error + */ + void parseFromLocalFileData(byte[] buffer, int offset, int length) + throws ZipException; + + /** + * Populate data from this array as if it was in central directory data. + * + * @param buffer the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + * @throws ZipException on error + */ + void parseFromCentralDirectoryData(byte[] buffer, int offset, int length) + throws ZipException; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java new file mode 100644 index 000000000..6beedcb52 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -0,0 +1,1278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.Closeable; +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.SequenceInputStream; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.Enumeration; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.zip.Inflater; +import java.util.zip.ZipException; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; +import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; +import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; +import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; + +/** + * Replacement for <code>java.util.ZipFile</code>. + * + * <p>This class adds support for file name encodings other than UTF-8 + * (which is required to work on ZIP files created by native zip tools + * and is able to skip a preamble like the one found in self + * extracting archives. Furthermore it returns instances of + * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> + * instead of <code>java.util.zip.ZipEntry</code>.</p> + * + * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would + * have to reimplement all methods anyway. Like + * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the + * covers and supports compressed and uncompressed entries. As of + * Apache Commons Compress 1.3 it also transparently supports Zip64 + * extensions and thus individual entries and archives larger than 4 + * GB or with more than 65536 entries.</p> + * + * <p>The method signatures mimic the ones of + * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: + * + * <ul> + * <li>There is no getName method.</li> + * <li>entries has been renamed to getEntries.</li> + * <li>getEntries and getEntry return + * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> + * instances.</li> + * <li>close is allowed to throw IOException.</li> + * </ul> + * + */ +public class ZipFile implements Closeable { + private static final int HASH_SIZE = 509; + static final int NIBLET_MASK = 0x0f; + static final int BYTE_SHIFT = 8; + private static final int POS_0 = 0; + private static final int POS_1 = 1; + private static final int POS_2 = 2; + private static final int POS_3 = 3; + private static final byte[] ONE_ZERO_BYTE = new byte[1]; + + /** + * List of entries in the order they appear inside the central + * directory. + */ + private final List<ZipArchiveEntry> entries = + new LinkedList<>(); + + /** + * Maps String to list of ZipArchiveEntrys, name -> actual entries. + */ + private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = + new HashMap<>(HASH_SIZE); + + /** + * The encoding to use for filenames and the file comment. + * + * <p>For a list of possible values see <a + * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. + * Defaults to UTF-8.</p> + */ + private final String encoding; + + /** + * The zip encoding to use for filenames and the file comment. + */ + private final ZipEncoding zipEncoding; + + /** + * File name of actual source. + */ + private final String archiveName; + + /** + * The actual data source. + */ + private final SeekableByteChannel archive; + + /** + * Whether to look for and use Unicode extra fields. + */ + private final boolean useUnicodeExtraFields; + + /** + * Whether the file is closed. + */ + private volatile boolean closed = true; + + // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) + private final byte[] dwordBuf = new byte[DWORD]; + private final byte[] wordBuf = new byte[WORD]; + private final byte[] cfhBuf = new byte[CFH_LEN]; + private final byte[] shortBuf = new byte[SHORT]; + private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); + private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); + private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); + + /** + * Opens the given file for reading, assuming "UTF8" for file names. + * + * @param f the archive. + * + * @throws IOException if an error occurs while reading the file. + */ + public ZipFile(final File f) throws IOException { + this(f, ZipEncodingHelper.UTF8); + } + + /** + * Opens the given file for reading, assuming "UTF8". + * + * @param name name of the archive. + * + * @throws IOException if an error occurs while reading the file. + */ + public ZipFile(final String name) throws IOException { + this(new File(name), ZipEncodingHelper.UTF8); + } + + /** + * Opens the given file for reading, assuming the specified + * encoding for file names, scanning unicode extra fields. + * + * @param name name of the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * + * @throws IOException if an error occurs while reading the file. + */ + public ZipFile(final String name, final String encoding) throws IOException { + this(new File(name), encoding, true); + } + + /** + * Opens the given file for reading, assuming the specified + * encoding for file names and scanning for unicode extra fields. + * + * @param f the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * + * @throws IOException if an error occurs while reading the file. + */ + public ZipFile(final File f, final String encoding) throws IOException { + this(f, encoding, true); + } + + /** + * Opens the given file for reading, assuming the specified + * encoding for file names. + * + * @param f the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + * + * @throws IOException if an error occurs while reading the file. + */ + public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) + throws IOException { + this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), + f.getAbsolutePath(), encoding, useUnicodeExtraFields, true); + } + + /** + * Opens the given channel for reading, assuming "UTF8" for file names. + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the archive. + * + * @throws IOException if an error occurs while reading the file. + * @since 1.13 + */ + public ZipFile(final SeekableByteChannel channel) + throws IOException { + this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); + } + + /** + * Opens the given channel for reading, assuming the specified + * encoding for file names. + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * + * @throws IOException if an error occurs while reading the file. + * @since 1.13 + */ + public ZipFile(final SeekableByteChannel channel, final String encoding) + throws IOException { + this(channel, "unknown archive", encoding, true); + } + + /** + * Opens the given channel for reading, assuming the specified + * encoding for file names. + * + * <p>{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.</p> + * + * @param channel the archive. + * @param archiveName name of the archive, used for error messages only. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + * + * @throws IOException if an error occurs while reading the file. + * @since 1.13 + */ + public ZipFile(final SeekableByteChannel channel, final String archiveName, + final String encoding, final boolean useUnicodeExtraFields) + throws IOException { + this(channel, archiveName, encoding, useUnicodeExtraFields, false); + } + + private ZipFile(final SeekableByteChannel channel, final String archiveName, + final String encoding, final boolean useUnicodeExtraFields, + final boolean closeOnError) + throws IOException { + this.archiveName = archiveName; + this.encoding = encoding; + this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + this.useUnicodeExtraFields = useUnicodeExtraFields; + archive = channel; + boolean success = false; + try { + final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = + populateFromCentralDirectory(); + resolveLocalFileHeaderData(entriesWithoutUTF8Flag); + success = true; + } finally { + closed = !success; + if (!success && closeOnError) { + IOUtils.closeQuietly(archive); + } + } + } + + /** + * The encoding to use for filenames and the file comment. + * + * @return null if using the platform's default character encoding. + */ + public String getEncoding() { + return encoding; + } + + /** + * Closes the archive. + * @throws IOException if an error occurs closing the archive. + */ + @Override + public void close() throws IOException { + // this flag is only written here and read in finalize() which + // can never be run in parallel. + // no synchronization needed. + closed = true; + + archive.close(); + } + + /** + * close a zipfile quietly; throw no io fault, do nothing + * on a null parameter + * @param zipfile file to close, can be null + */ + public static void closeQuietly(final ZipFile zipfile) { + IOUtils.closeQuietly(zipfile); + } + + /** + * Returns all entries. + * + * <p>Entries will be returned in the same order they appear + * within the archive's central directory.</p> + * + * @return all entries as {@link ZipArchiveEntry} instances + */ + public Enumeration<ZipArchiveEntry> getEntries() { + return Collections.enumeration(entries); + } + + /** + * Returns all entries in physical order. + * + * <p>Entries will be returned in the same order their contents + * appear within the archive.</p> + * + * @return all entries as {@link ZipArchiveEntry} instances + * + * @since 1.1 + */ + public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { + final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); + Arrays.sort(allEntries, offsetComparator); + return Collections.enumeration(Arrays.asList(allEntries)); + } + + /** + * Returns a named entry - or {@code null} if no entry by + * that name exists. + * + * <p>If multiple entries with the same name exist the first entry + * in the archive's central directory by that name is + * returned.</p> + * + * @param name name of the entry. + * @return the ZipArchiveEntry corresponding to the given name - or + * {@code null} if not present. + */ + public ZipArchiveEntry getEntry(final String name) { + final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); + return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; + } + + /** + * Returns all named entries in the same order they appear within + * the archive's central directory. + * + * @param name name of the entry. + * @return the Iterable<ZipArchiveEntry> corresponding to the + * given name + * @since 1.6 + */ + public Iterable<ZipArchiveEntry> getEntries(final String name) { + final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); + return entriesOfThatName != null ? entriesOfThatName + : Collections.<ZipArchiveEntry>emptyList(); + } + + /** + * Returns all named entries in the same order their contents + * appear within the archive. + * + * @param name name of the entry. + * @return the Iterable<ZipArchiveEntry> corresponding to the + * given name + * @since 1.6 + */ + public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { + ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; + if (nameMap.containsKey(name)) { + entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); + Arrays.sort(entriesOfThatName, offsetComparator); + } + return Arrays.asList(entriesOfThatName); + } + + /** + * Whether this class is able to read the given entry. + * + * <p>May return false if it is set up to use encryption or a + * compression method that hasn't been implemented yet.</p> + * @since 1.1 + * @param ze the entry + * @return whether this class is able to read the given entry. + */ + public boolean canReadEntryData(final ZipArchiveEntry ze) { + return ZipUtil.canHandleEntryData(ze); + } + + /** + * Expose the raw stream of the archive entry (compressed form). + * + * <p>This method does not relate to how/if we understand the payload in the + * stream, since we really only intend to move it on to somewhere else.</p> + * + * @param ze The entry to get the stream for + * @return The raw input stream containing (possibly) compressed data. + * @since 1.11 + */ + public InputStream getRawInputStream(final ZipArchiveEntry ze) { + if (!(ze instanceof Entry)) { + return null; + } + final long start = ze.getDataOffset(); + return createBoundedInputStream(start, ze.getCompressedSize()); + } + + + /** + * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. + * Compression and all other attributes will be as in this file. + * <p>This method transfers entries based on the central directory of the zip file.</p> + * + * @param target The zipArchiveOutputStream to write the entries to + * @param predicate A predicate that selects which entries to write + * @throws IOException on error + */ + public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) + throws IOException { + final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); + while (src.hasMoreElements()) { + final ZipArchiveEntry entry = src.nextElement(); + if (predicate.test( entry)) { + target.addRawArchiveEntry(entry, getRawInputStream(entry)); + } + } + } + + /** + * Returns an InputStream for reading the contents of the given entry. + * + * @param ze the entry to get the stream for. + * @return a stream to read the entry from. The returned stream + * implements {@link InputStreamStatistics}. + * @throws IOException if unable to create an input stream from the zipentry + */ + public InputStream getInputStream(final ZipArchiveEntry ze) + throws IOException { + if (!(ze instanceof Entry)) { + return null; + } + // cast validity is checked just above + ZipUtil.checkRequestedFeatures(ze); + final long start = ze.getDataOffset(); + + // doesn't get closed if the method is not supported - which + // should never happen because of the checkRequestedFeatures + // call above + final InputStream is = + new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR + switch (ZipMethod.getMethodByCode(ze.getMethod())) { + case STORED: + return new StoredStatisticsStream(is); + case UNSHRINKING: + return new UnshrinkingInputStream(is); + case IMPLODING: + return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), + ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); + case DEFLATED: + final Inflater inflater = new Inflater(true); + // Inflater with nowrap=true has this odd contract for a zero padding + // byte following the data stream; this used to be zlib's requirement + // and has been fixed a long time ago, but the contract persists so + // we comply. + // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) + return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), + inflater) { + @Override + public void close() throws IOException { + try { + super.close(); + } finally { + inflater.end(); + } + } + }; + case BZIP2: + return new BZip2CompressorInputStream(is); + case ENHANCED_DEFLATED: + return new Deflate64CompressorInputStream(is); + case AES_ENCRYPTED: + case EXPANDING_LEVEL_1: + case EXPANDING_LEVEL_2: + case EXPANDING_LEVEL_3: + case EXPANDING_LEVEL_4: + case JPEG: + case LZMA: + case PKWARE_IMPLODING: + case PPMD: + case TOKENIZATION: + case UNKNOWN: + case WAVPACK: + case XZ: + default: + throw new ZipException("Found unsupported compression method " + + ze.getMethod()); + } + } + + /** + * <p> + * Convenience method to return the entry's content as a String if isUnixSymlink() + * returns true for it, otherwise returns null. + * </p> + * + * <p>This method assumes the symbolic link's file name uses the + * same encoding that as been specified for this ZipFile.</p> + * + * @param entry ZipArchiveEntry object that represents the symbolic link + * @return entry's content as a String + * @throws IOException problem with content's input stream + * @since 1.5 + */ + public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { + if (entry != null && entry.isUnixSymlink()) { + try (InputStream in = getInputStream(entry)) { + return zipEncoding.decode(IOUtils.toByteArray(in)); + } + } + return null; + } + + /** + * Ensures that the close method of this zipfile is called when + * there are no more references to it. + * @see #close() + */ + @Override + protected void finalize() throws Throwable { + try { + if (!closed) { + System.err.println("Cleaning up unclosed ZipFile for archive " + + archiveName); + close(); + } + } finally { + super.finalize(); + } + } + + /** + * Length of a "central directory" entry structure without file + * name, extra fields or comment. + */ + private static final int CFH_LEN = + /* version made by */ SHORT + /* version needed to extract */ + SHORT + /* general purpose bit flag */ + SHORT + /* compression method */ + SHORT + /* last mod file time */ + SHORT + /* last mod file date */ + SHORT + /* crc-32 */ + WORD + /* compressed size */ + WORD + /* uncompressed size */ + WORD + /* filename length */ + SHORT + /* extra field length */ + SHORT + /* file comment length */ + SHORT + /* disk number start */ + SHORT + /* internal file attributes */ + SHORT + /* external file attributes */ + WORD + /* relative offset of local header */ + WORD; + + private static final long CFH_SIG = + ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); + + /** + * Reads the central directory of the given archive and populates + * the internal tables with ZipArchiveEntry instances. + * + * <p>The ZipArchiveEntrys will know all data that can be obtained from + * the central directory alone, but not the data that requires the + * local file header or additional data to be read.</p> + * + * @return a map of zipentries that didn't have the language + * encoding flag set when read. + */ + private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() + throws IOException { + final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = + new HashMap<>(); + + positionAtCentralDirectory(); + + wordBbuf.rewind(); + IOUtils.readFully(archive, wordBbuf); + long sig = ZipLong.getValue(wordBuf); + + if (sig != CFH_SIG && startsWithLocalFileHeader()) { + throw new IOException("central directory is empty, can't expand" + + " corrupt archive."); + } + + while (sig == CFH_SIG) { + readCentralDirectoryEntry(noUTF8Flag); + wordBbuf.rewind(); + IOUtils.readFully(archive, wordBbuf); + sig = ZipLong.getValue(wordBuf); + } + return noUTF8Flag; + } + + /** + * Reads an individual entry of the central directory, creats an + * ZipArchiveEntry from it and adds it to the global maps. + * + * @param noUTF8Flag map used to collect entries that don't have + * their UTF-8 flag set and whose name will be set by data read + * from the local file header later. The current entry may be + * added to this map. + */ + private void + readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) + throws IOException { + cfhBbuf.rewind(); + IOUtils.readFully(archive, cfhBbuf); + int off = 0; + final Entry ze = new Entry(); + + final int versionMadeBy = ZipShort.getValue(cfhBuf, off); + off += SHORT; + ze.setVersionMadeBy(versionMadeBy); + ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); + + ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); + off += SHORT; // version required + + final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); + final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); + final ZipEncoding entryEncoding = + hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; + if (hasUTF8Flag) { + ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); + } + ze.setGeneralPurposeBit(gpFlag); + ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); + + off += SHORT; + + //noinspection MagicConstant + ze.setMethod(ZipShort.getValue(cfhBuf, off)); + off += SHORT; + + final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); + ze.setTime(time); + off += WORD; + + ze.setCrc(ZipLong.getValue(cfhBuf, off)); + off += WORD; + + ze.setCompressedSize(ZipLong.getValue(cfhBuf, off)); + off += WORD; + + ze.setSize(ZipLong.getValue(cfhBuf, off)); + off += WORD; + + final int fileNameLen = ZipShort.getValue(cfhBuf, off); + off += SHORT; + + final int extraLen = ZipShort.getValue(cfhBuf, off); + off += SHORT; + + final int commentLen = ZipShort.getValue(cfhBuf, off); + off += SHORT; + + final int diskStart = ZipShort.getValue(cfhBuf, off); + off += SHORT; + + ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); + off += SHORT; + + ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); + off += WORD; + + final byte[] fileName = new byte[fileNameLen]; + IOUtils.readFully(archive, ByteBuffer.wrap(fileName)); + ze.setName(entryEncoding.decode(fileName), fileName); + + // LFH offset, + ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); + // data offset will be filled later + entries.add(ze); + + final byte[] cdExtraData = new byte[extraLen]; + IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData)); + ze.setCentralDirectoryExtra(cdExtraData); + + setSizesAndOffsetFromZip64Extra(ze, diskStart); + + final byte[] comment = new byte[commentLen]; + IOUtils.readFully(archive, ByteBuffer.wrap(comment)); + ze.setComment(entryEncoding.decode(comment)); + + if (!hasUTF8Flag && useUnicodeExtraFields) { + noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); + } + } + + /** + * If the entry holds a Zip64 extended information extra field, + * read sizes from there if the entry's sizes are set to + * 0xFFFFFFFFF, do the same for the offset of the local file + * header. + * + * <p>Ensures the Zip64 extra either knows both compressed and + * uncompressed size or neither of both as the internal logic in + * ExtraFieldUtils forces the field to create local header data + * even if they are never used - and here a field with only one + * size would be invalid.</p> + */ + private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze, + final int diskStart) + throws IOException { + final Zip64ExtendedInformationExtraField z64 = + (Zip64ExtendedInformationExtraField) + ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); + if (z64 != null) { + final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; + final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; + final boolean hasRelativeHeaderOffset = + ze.getLocalHeaderOffset() == ZIP64_MAGIC; + z64.reparseCentralDirectoryData(hasUncompressedSize, + hasCompressedSize, + hasRelativeHeaderOffset, + diskStart == ZIP64_MAGIC_SHORT); + + if (hasUncompressedSize) { + ze.setSize(z64.getSize().getLongValue()); + } else if (hasCompressedSize) { + z64.setSize(new ZipEightByteInteger(ze.getSize())); + } + + if (hasCompressedSize) { + ze.setCompressedSize(z64.getCompressedSize().getLongValue()); + } else if (hasUncompressedSize) { + z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); + } + + if (hasRelativeHeaderOffset) { + ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); + } + } + } + + /** + * Length of the "End of central directory record" - which is + * supposed to be the last structure of the archive - without file + * comment. + */ + static final int MIN_EOCD_SIZE = + /* end of central dir signature */ WORD + /* number of this disk */ + SHORT + /* number of the disk with the */ + /* start of the central directory */ + SHORT + /* total number of entries in */ + /* the central dir on this disk */ + SHORT + /* total number of entries in */ + /* the central dir */ + SHORT + /* size of the central directory */ + WORD + /* offset of start of central */ + /* directory with respect to */ + /* the starting disk number */ + WORD + /* zipfile comment length */ + SHORT; + + /** + * Maximum length of the "End of central directory record" with a + * file comment. + */ + private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE + /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; + + /** + * Offset of the field that holds the location of the first + * central directory entry inside the "End of central directory + * record" relative to the start of the "End of central directory + * record". + */ + private static final int CFD_LOCATOR_OFFSET = + /* end of central dir signature */ WORD + /* number of this disk */ + SHORT + /* number of the disk with the */ + /* start of the central directory */ + SHORT + /* total number of entries in */ + /* the central dir on this disk */ + SHORT + /* total number of entries in */ + /* the central dir */ + SHORT + /* size of the central directory */ + WORD; + + /** + * Length of the "Zip64 end of central directory locator" - which + * should be right in front of the "end of central directory + * record" if one is present at all. + */ + private static final int ZIP64_EOCDL_LENGTH = + /* zip64 end of central dir locator sig */ WORD + /* number of the disk with the start */ + /* start of the zip64 end of */ + /* central directory */ + WORD + /* relative offset of the zip64 */ + /* end of central directory record */ + DWORD + /* total number of disks */ + WORD; + + /** + * Offset of the field that holds the location of the "Zip64 end + * of central directory record" inside the "Zip64 end of central + * directory locator" relative to the start of the "Zip64 end of + * central directory locator". + */ + private static final int ZIP64_EOCDL_LOCATOR_OFFSET = + /* zip64 end of central dir locator sig */ WORD + /* number of the disk with the start */ + /* start of the zip64 end of */ + /* central directory */ + WORD; + + /** + * Offset of the field that holds the location of the first + * central directory entry inside the "Zip64 end of central + * directory record" relative to the start of the "Zip64 end of + * central directory record". + */ + private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = + /* zip64 end of central dir */ + /* signature */ WORD + /* size of zip64 end of central */ + /* directory record */ + DWORD + /* version made by */ + SHORT + /* version needed to extract */ + SHORT + /* number of this disk */ + WORD + /* number of the disk with the */ + /* start of the central directory */ + WORD + /* total number of entries in the */ + /* central directory on this disk */ + DWORD + /* total number of entries in the */ + /* central directory */ + DWORD + /* size of the central directory */ + DWORD; + + /** + * Searches for either the "Zip64 end of central directory + * locator" or the "End of central dir record", parses + * it and positions the stream at the first central directory + * record. + */ + private void positionAtCentralDirectory() + throws IOException { + positionAtEndOfCentralDirectoryRecord(); + boolean found = false; + final boolean searchedForZip64EOCD = + archive.position() > ZIP64_EOCDL_LENGTH; + if (searchedForZip64EOCD) { + archive.position(archive.position() - ZIP64_EOCDL_LENGTH); + wordBbuf.rewind(); + IOUtils.readFully(archive, wordBbuf); + found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, + wordBuf); + } + if (!found) { + // not a ZIP64 archive + if (searchedForZip64EOCD) { + skipBytes(ZIP64_EOCDL_LENGTH - WORD); + } + positionAtCentralDirectory32(); + } else { + positionAtCentralDirectory64(); + } + } + + /** + * Parses the "Zip64 end of central directory locator", + * finds the "Zip64 end of central directory record" using the + * parsed information, parses that and positions the stream at the + * first central directory record. + * + * Expects stream to be positioned right behind the "Zip64 + * end of central directory locator"'s signature. + */ + private void positionAtCentralDirectory64() + throws IOException { + skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET + - WORD /* signature has already been read */); + dwordBbuf.rewind(); + IOUtils.readFully(archive, dwordBbuf); + archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); + wordBbuf.rewind(); + IOUtils.readFully(archive, wordBbuf); + if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { + throw new ZipException("archive's ZIP64 end of central " + + "directory locator is corrupt."); + } + skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET + - WORD /* signature has already been read */); + dwordBbuf.rewind(); + IOUtils.readFully(archive, dwordBbuf); + archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); + } + + /** + * Parses the "End of central dir record" and positions + * the stream at the first central directory record. + * + * Expects stream to be positioned at the beginning of the + * "End of central dir record". + */ + private void positionAtCentralDirectory32() + throws IOException { + skipBytes(CFD_LOCATOR_OFFSET); + wordBbuf.rewind(); + IOUtils.readFully(archive, wordBbuf); + archive.position(ZipLong.getValue(wordBuf)); + } + + /** + * Searches for the and positions the stream at the start of the + * "End of central dir record". + */ + private void positionAtEndOfCentralDirectoryRecord() + throws IOException { + final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, + ZipArchiveOutputStream.EOCD_SIG); + if (!found) { + throw new ZipException("archive is not a ZIP archive"); + } + } + + /** + * Searches the archive backwards from minDistance to maxDistance + * for the given signature, positions the RandomaccessFile right + * at the signature if it has been found. + */ + private boolean tryToLocateSignature(final long minDistanceFromEnd, + final long maxDistanceFromEnd, + final byte[] sig) throws IOException { + boolean found = false; + long off = archive.size() - minDistanceFromEnd; + final long stopSearching = + Math.max(0L, archive.size() - maxDistanceFromEnd); + if (off >= 0) { + for (; off >= stopSearching; off--) { + archive.position(off); + try { + wordBbuf.rewind(); + IOUtils.readFully(archive, wordBbuf); + wordBbuf.flip(); + } catch (EOFException ex) { + break; + } + int curr = wordBbuf.get(); + if (curr == sig[POS_0]) { + curr = wordBbuf.get(); + if (curr == sig[POS_1]) { + curr = wordBbuf.get(); + if (curr == sig[POS_2]) { + curr = wordBbuf.get(); + if (curr == sig[POS_3]) { + found = true; + break; + } + } + } + } + } + } + if (found) { + archive.position(off); + } + return found; + } + + /** + * Skips the given number of bytes or throws an EOFException if + * skipping failed. + */ + private void skipBytes(final int count) throws IOException { + long currentPosition = archive.position(); + long newPosition = currentPosition + count; + if (newPosition > archive.size()) { + throw new EOFException(); + } + archive.position(newPosition); + } + + /** + * Number of bytes in local file header up to the "length of + * filename" entry. + */ + private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = + /* local file header signature */ WORD + /* version needed to extract */ + SHORT + /* general purpose bit flag */ + SHORT + /* compression method */ + SHORT + /* last mod file time */ + SHORT + /* last mod file date */ + SHORT + /* crc-32 */ + WORD + /* compressed size */ + WORD + /* uncompressed size */ + (long) WORD; + + /** + * Walks through all recorded entries and adds the data available + * from the local file header. + * + * <p>Also records the offsets for the data to read from the + * entries.</p> + */ + private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> + entriesWithoutUTF8Flag) + throws IOException { + for (final ZipArchiveEntry zipArchiveEntry : entries) { + // entries is filled in populateFromCentralDirectory and + // never modified + final Entry ze = (Entry) zipArchiveEntry; + final long offset = ze.getLocalHeaderOffset(); + archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); + wordBbuf.rewind(); + IOUtils.readFully(archive, wordBbuf); + wordBbuf.flip(); + wordBbuf.get(shortBuf); + final int fileNameLen = ZipShort.getValue(shortBuf); + wordBbuf.get(shortBuf); + final int extraFieldLen = ZipShort.getValue(shortBuf); + skipBytes(fileNameLen); + final byte[] localExtraData = new byte[extraFieldLen]; + IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData)); + ze.setExtra(localExtraData); + ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + + SHORT + SHORT + fileNameLen + extraFieldLen); + ze.setStreamContiguous(true); + + if (entriesWithoutUTF8Flag.containsKey(ze)) { + final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); + ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, + nc.comment); + } + + final String name = ze.getName(); + LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); + if (entriesOfThatName == null) { + entriesOfThatName = new LinkedList<>(); + nameMap.put(name, entriesOfThatName); + } + entriesOfThatName.addLast(ze); + } + } + + /** + * Checks whether the archive starts with a LFH. If it doesn't, + * it may be an empty archive. + */ + private boolean startsWithLocalFileHeader() throws IOException { + archive.position(0); + wordBbuf.rewind(); + IOUtils.readFully(archive, wordBbuf); + return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); + } + + /** + * Creates new BoundedInputStream, according to implementation of + * underlying archive channel. + */ + private BoundedInputStream createBoundedInputStream(long start, long remaining) { + return archive instanceof FileChannel ? + new BoundedFileChannelInputStream(start, remaining) : + new BoundedInputStream(start, remaining); + } + + /** + * InputStream that delegates requests to the underlying + * SeekableByteChannel, making sure that only bytes from a certain + * range can be read. + */ + private class BoundedInputStream extends InputStream { + private ByteBuffer singleByteBuffer; + private final long end; + private long loc; + + BoundedInputStream(final long start, final long remaining) { + this.end = start+remaining; + if (this.end < start) { + // check for potential vulnerability due to overflow + throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining); + } + loc = start; + } + + @Override + public synchronized int read() throws IOException { + if (loc >= end) { + return -1; + } + if (singleByteBuffer == null) { + singleByteBuffer = ByteBuffer.allocate(1); + } + else { + singleByteBuffer.rewind(); + } + int read = read(loc, singleByteBuffer); + if (read < 0) { + return read; + } + loc++; + return singleByteBuffer.get() & 0xff; + } + + @Override + public synchronized int read(final byte[] b, final int off, int len) throws IOException { + if (len <= 0) { + return 0; + } + + if (len > end-loc) { + if (loc >= end) { + return -1; + } + len = (int)(end-loc); + } + + ByteBuffer buf; + buf = ByteBuffer.wrap(b, off, len); + int ret = read(loc, buf); + if (ret > 0) { + loc += ret; + return ret; + } + return ret; + } + + protected int read(long pos, ByteBuffer buf) throws IOException { + int read; + synchronized (archive) { + archive.position(pos); + read = archive.read(buf); + } + buf.flip(); + return read; + } + } + + /** + * Lock-free implementation of BoundedInputStream. The + * implementation uses positioned reads on the underlying archive + * file channel and therefore performs significantly faster in + * concurrent environment. + */ + private class BoundedFileChannelInputStream extends BoundedInputStream { + private final FileChannel archive; + + BoundedFileChannelInputStream(final long start, final long remaining) { + super(start, remaining); + archive = (FileChannel)ZipFile.this.archive; + } + + @Override + protected int read(long pos, ByteBuffer buf) throws IOException { + int read = archive.read(buf, pos); + buf.flip(); + return read; + } + } + + private static final class NameAndComment { + private final byte[] name; + private final byte[] comment; + private NameAndComment(final byte[] name, final byte[] comment) { + this.name = name; + this.comment = comment; + } + } + + /** + * Compares two ZipArchiveEntries based on their offset within the archive. + * + * <p>Won't return any meaningful results if one of the entries + * isn't part of the archive at all.</p> + * + * @since 1.1 + */ + private final Comparator<ZipArchiveEntry> offsetComparator = + new Comparator<ZipArchiveEntry>() { + @Override + public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) { + if (e1 == e2) { + return 0; + } + + final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; + final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; + if (ent1 == null) { + return 1; + } + if (ent2 == null) { + return -1; + } + final long val = (ent1.getLocalHeaderOffset() + - ent2.getLocalHeaderOffset()); + return val == 0 ? 0 : val < 0 ? -1 : +1; + } + }; + + /** + * Extends ZipArchiveEntry to store the offset within the archive. + */ + private static class Entry extends ZipArchiveEntry { + + Entry() { + } + + @Override + public int hashCode() { + return 3 * super.hashCode() + + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); + } + + @Override + public boolean equals(final Object other) { + if (super.equals(other)) { + // super.equals would return false if other were not an Entry + final Entry otherEntry = (Entry) other; + return getLocalHeaderOffset() + == otherEntry.getLocalHeaderOffset() + && getDataOffset() + == otherEntry.getDataOffset(); + } + return false; + } + } + + private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { + StoredStatisticsStream(InputStream in) { + super(in); + } + + @Override + public long getCompressedCount() { + return super.getBytesRead(); + } + + @Override + public long getUncompressedCount() { + return getCompressedCount(); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipLong.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipLong.java new file mode 100644 index 000000000..6046c61da --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipLong.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.utils.ByteUtils; + +import java.io.Serializable; + +import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; + +/** + * Utility class that represents a four byte integer with conversion + * rules for the little endian byte order of ZIP files. + * @Immutable + */ +public final class ZipLong implements Cloneable, Serializable { + private static final long serialVersionUID = 1L; + + private final long value; + + /** Central File Header Signature */ + public static final ZipLong CFH_SIG = new ZipLong(0X02014B50L); + + /** Local File Header Signature */ + public static final ZipLong LFH_SIG = new ZipLong(0X04034B50L); + + /** + * Data Descriptor signature. + * + * <p>Actually, PKWARE uses this as marker for split/spanned + * archives and other archivers have started to use it as Data + * Descriptor signature (as well).</p> + * @since 1.1 + */ + public static final ZipLong DD_SIG = new ZipLong(0X08074B50L); + + /** + * Value stored in size and similar fields if ZIP64 extensions are + * used. + * @since 1.3 + */ + static final ZipLong ZIP64_MAGIC = new ZipLong(ZipConstants.ZIP64_MAGIC); + + /** + * Marks ZIP archives that were supposed to be split or spanned + * but only needed a single segment in then end (so are actually + * neither split nor spanned). + * + * <p>This is the "PK00" prefix found in some archives.</p> + * @since 1.5 + */ + public static final ZipLong SINGLE_SEGMENT_SPLIT_MARKER = + new ZipLong(0X30304B50L); + + /** + * Archive extra data record signature. + * @since 1.5 + */ + public static final ZipLong AED_SIG = new ZipLong(0X08064B50L); + + /** + * Create instance from a number. + * @param value the long to store as a ZipLong + */ + public ZipLong(final long value) { + this.value = value; + } + + /** + * create instance from a java int. + * @param value the int to store as a ZipLong + * @since 1.15 + */ + public ZipLong(int value) { + this.value = value; + } + + /** + * Create instance from bytes. + * @param bytes the bytes to store as a ZipLong + */ + public ZipLong (final byte[] bytes) { + this(bytes, 0); + } + + /** + * Create instance from the four bytes starting at offset. + * @param bytes the bytes to store as a ZipLong + * @param offset the offset to start + */ + public ZipLong (final byte[] bytes, final int offset) { + value = ZipLong.getValue(bytes, offset); + } + + /** + * Get value as four bytes in big endian byte order. + * @return value as four bytes in big endian order + */ + public byte[] getBytes() { + return ZipLong.getBytes(value); + } + + /** + * Get value as Java long. + * @return value as a long + */ + public long getValue() { + return value; + } + + /** + * Get value as a (signed) java int + * @return value as int + * @since 1.15 + */ + public int getIntValue() { return (int)value;} + + /** + * Get value as four bytes in big endian byte order. + * @param value the value to convert + * @return value as four bytes in big endian byte order + */ + public static byte[] getBytes(final long value) { + final byte[] result = new byte[WORD]; + putLong(value, result, 0); + return result; + } + + /** + * put the value as four bytes in big endian byte order. + * @param value the Java long to convert to bytes + * @param buf the output buffer + * @param offset + * The offset within the output buffer of the first byte to be written. + * must be non-negative and no larger than <tt>buf.length-4</tt> + */ + + public static void putLong(final long value, final byte[] buf, int offset) { + ByteUtils.toLittleEndian(buf, value, offset, 4); + } + + public void putLong(final byte[] buf, final int offset) { + putLong(value, buf, offset); + } + + /** + * Helper method to get the value as a Java long from four bytes starting at given array offset + * @param bytes the array of bytes + * @param offset the offset to start + * @return the corresponding Java long value + */ + public static long getValue(final byte[] bytes, final int offset) { + return ByteUtils.fromLittleEndian(bytes, offset, 4); + } + + /** + * Helper method to get the value as a Java long from a four-byte array + * @param bytes the array of bytes + * @return the corresponding Java long value + */ + public static long getValue(final byte[] bytes) { + return getValue(bytes, 0); + } + + /** + * Override to make two instances with same value equal. + * @param o an object to compare + * @return true if the objects are equal + */ + @Override + public boolean equals(final Object o) { + if (o == null || !(o instanceof ZipLong)) { + return false; + } + return value == ((ZipLong) o).getValue(); + } + + /** + * Override to make two instances with same value equal. + * @return the value stored in the ZipLong + */ + @Override + public int hashCode() { + return (int) value; + } + + @Override + public Object clone() { + try { + return super.clone(); + } catch (final CloneNotSupportedException cnfe) { + // impossible + throw new RuntimeException(cnfe); //NOSONAR + } + } + + @Override + public String toString() { + return "ZipLong value: " + value; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java new file mode 100644 index 000000000..0c9112d7b --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.zip.ZipEntry; + +/** + * List of known compression methods + * + * Many of these methods are currently not supported by commons compress + * + * @since 1.5 + */ +public enum ZipMethod { + + /** + * Compression method 0 for uncompressed entries. + * + * @see ZipEntry#STORED + */ + STORED(ZipEntry.STORED), + + /** + * UnShrinking. + * dynamic Lempel-Ziv-Welch-Algorithm + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression + * method: (2 bytes)</a> + */ + UNSHRINKING(1), + + /** + * Reduced with compression factor 1. + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression + * method: (2 bytes)</a> + */ + EXPANDING_LEVEL_1(2), + + /** + * Reduced with compression factor 2. + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression + * method: (2 bytes)</a> + */ + EXPANDING_LEVEL_2(3), + + /** + * Reduced with compression factor 3. + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression + * method: (2 bytes)</a> + */ + EXPANDING_LEVEL_3(4), + + /** + * Reduced with compression factor 4. + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression + * method: (2 bytes)</a> + */ + EXPANDING_LEVEL_4(5), + + /** + * Imploding. + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression + * method: (2 bytes)</a> + */ + IMPLODING(6), + + /** + * Tokenization. + * + * @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression + * method: (2 bytes)</a> + */ + TOKENIZATION(7), + + /** + * Compression method 8 for compressed (deflated) entries. + * + * @see ZipEntry#DEFLATED + */ + DEFLATED(ZipEntry.DEFLATED), + + /** + * Compression Method 9 for enhanced deflate. + * + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + ENHANCED_DEFLATED(9), + + /** + * PKWARE Data Compression Library Imploding. + * + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + PKWARE_IMPLODING(10), + + /** + * Compression Method 12 for bzip2. + * + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + BZIP2(12), + + /** + * Compression Method 14 for LZMA. + * + * @see <a href="https://www.7-zip.org/sdk.html">https://www.7-zip.org/sdk.html</a> + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + LZMA(14), + + + /** + * Compression Method 95 for XZ. + * + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + XZ(95), + + /** + * Compression Method 96 for Jpeg compression. + * + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + JPEG(96), + + /** + * Compression Method 97 for WavPack. + * + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + WAVPACK(97), + + /** + * Compression Method 98 for PPMd. + * + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + PPMD(98), + + + /** + * Compression Method 99 for AES encryption. + * + * @see <a href="https://www.winzip.com/wz54.htm">https://www.winzip.com/wz54.htm</a> + */ + AES_ENCRYPTED(99), + + /** + * Unknown compression method. + */ + UNKNOWN(); + + static final int UNKNOWN_CODE = -1; + + private final int code; + + private static final Map<Integer, ZipMethod> codeToEnum; + + static { + final Map<Integer, ZipMethod> cte = new HashMap<>(); + for (final ZipMethod method : values()) { + cte.put(method.getCode(), method); + } + codeToEnum = Collections.unmodifiableMap(cte); + } + + private ZipMethod() { + this(UNKNOWN_CODE); + } + + /** + * private constructor for enum style class. + */ + ZipMethod(final int code) { + this.code = code; + } + + /** + * the code of the compression method. + * + * @see ZipArchiveEntry#getMethod() + * + * @return an integer code for the method + */ + public int getCode() { + return code; + } + + + /** + * returns the {@link ZipMethod} for the given code or null if the + * method is not known. + * @param code the code + * @return the {@link ZipMethod} for the given code or null if the + * method is not known. + */ + public static ZipMethod getMethodByCode(final int code) { + return codeToEnum.get(code); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipShort.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipShort.java new file mode 100644 index 000000000..ccb50940a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipShort.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.Serializable; + +import org.apache.commons.compress.utils.ByteUtils; + +/** + * Utility class that represents a two byte integer with conversion + * rules for the little endian byte order of ZIP files. + * @Immutable + */ +public final class ZipShort implements Cloneable, Serializable { + /** + * ZipShort with a value of 0. + * @since 1.14 + */ + public static final ZipShort ZERO = new ZipShort(0); + + private static final long serialVersionUID = 1L; + + private final int value; + + /** + * Create instance from a number. + * @param value the int to store as a ZipShort + */ + public ZipShort (final int value) { + this.value = value; + } + + /** + * Create instance from bytes. + * @param bytes the bytes to store as a ZipShort + */ + public ZipShort (final byte[] bytes) { + this(bytes, 0); + } + + /** + * Create instance from the two bytes starting at offset. + * @param bytes the bytes to store as a ZipShort + * @param offset the offset to start + */ + public ZipShort (final byte[] bytes, final int offset) { + value = ZipShort.getValue(bytes, offset); + } + + /** + * Get value as two bytes in big endian byte order. + * @return the value as a a two byte array in big endian byte order + */ + public byte[] getBytes() { + final byte[] result = new byte[2]; + ByteUtils.toLittleEndian(result, value, 0, 2); + return result; + } + + /** + * Get value as Java int. + * @return value as a Java int + */ + public int getValue() { + return value; + } + + /** + * Get value as two bytes in big endian byte order. + * @param value the Java int to convert to bytes + * @return the converted int as a byte array in big endian byte order + */ + public static byte[] getBytes(final int value) { + final byte[] result = new byte[2]; + putShort(value, result, 0); + return result; + } + + /** + * put the value as two bytes in big endian byte order. + * @param value the Java int to convert to bytes + * @param buf the output buffer + * @param offset + * The offset within the output buffer of the first byte to be written. + * must be non-negative and no larger than <tt>buf.length-2</tt> + */ + public static void putShort(final int value, final byte[] buf, final int offset) { + ByteUtils.toLittleEndian(buf, value, offset, 2); + } + + /** + * Helper method to get the value as a java int from two bytes starting at given array offset + * @param bytes the array of bytes + * @param offset the offset to start + * @return the corresponding java int value + */ + public static int getValue(final byte[] bytes, final int offset) { + return (int) ByteUtils.fromLittleEndian(bytes, offset, 2); + } + + /** + * Helper method to get the value as a java int from a two-byte array + * @param bytes the array of bytes + * @return the corresponding java int value + */ + public static int getValue(final byte[] bytes) { + return getValue(bytes, 0); + } + + /** + * Override to make two instances with same value equal. + * @param o an object to compare + * @return true if the objects are equal + */ + @Override + public boolean equals(final Object o) { + if (o == null || !(o instanceof ZipShort)) { + return false; + } + return value == ((ZipShort) o).getValue(); + } + + /** + * Override to make two instances with same value equal. + * @return the value stored in the ZipShort + */ + @Override + public int hashCode() { + return value; + } + + @Override + public Object clone() { + try { + return super.clone(); + } catch (final CloneNotSupportedException cnfe) { + // impossible + throw new RuntimeException(cnfe); //NOSONAR + } + } + + @Override + public String toString() { + return "ZipShort value: " + value; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java new file mode 100644 index 000000000..8cc3e6a4c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.Calendar; +import java.util.Date; +import java.util.zip.CRC32; +import java.util.zip.ZipEntry; + +/** + * Utility class for handling DOS and Java time conversions. + * @Immutable + */ +public abstract class ZipUtil { + /** + * Smallest date/time ZIP can handle. + */ + private static final byte[] DOS_TIME_MIN = ZipLong.getBytes(0x00002100L); + + /** + * Convert a Date object to a DOS date/time field. + * @param time the <code>Date</code> to convert + * @return the date as a <code>ZipLong</code> + */ + public static ZipLong toDosTime(final Date time) { + return new ZipLong(toDosTime(time.getTime())); + } + + /** + * Convert a Date object to a DOS date/time field. + * + * <p>Stolen from InfoZip's <code>fileio.c</code></p> + * @param t number of milliseconds since the epoch + * @return the date as a byte array + */ + public static byte[] toDosTime(final long t) { + final byte[] result = new byte[4]; + toDosTime(t, result, 0); + return result; + } + + /** + * Convert a Date object to a DOS date/time field. + * + * <p>Stolen from InfoZip's <code>fileio.c</code></p> + * @param t number of milliseconds since the epoch + * @param buf the output buffer + * @param offset + * The offset within the output buffer of the first byte to be written. + * must be non-negative and no larger than <tt>buf.length-4</tt> + */ + public static void toDosTime(final long t, final byte[] buf, final int offset) { + toDosTime(Calendar.getInstance(), t, buf, offset); + } + + static void toDosTime(final Calendar c, final long t, final byte[] buf, final int offset) { + c.setTimeInMillis(t); + + final int year = c.get(Calendar.YEAR); + if (year < 1980) { + System.arraycopy(DOS_TIME_MIN, 0, buf, offset, DOS_TIME_MIN.length);// stop callers from changing the array + return; + } + final int month = c.get(Calendar.MONTH) + 1; + final long value = ((year - 1980) << 25) + | (month << 21) + | (c.get(Calendar.DAY_OF_MONTH) << 16) + | (c.get(Calendar.HOUR_OF_DAY) << 11) + | (c.get(Calendar.MINUTE) << 5) + | (c.get(Calendar.SECOND) >> 1); + ZipLong.putLong(value, buf, offset); + } + + + /** + * Assumes a negative integer really is a positive integer that + * has wrapped around and re-creates the original value. + * + * @param i the value to treat as unsigned int. + * @return the unsigned int as a long. + */ + public static long adjustToLong(final int i) { + if (i < 0) { + return 2 * ((long) Integer.MAX_VALUE) + 2 + i; + } + return i; + } + + /** + * Reverses a byte[] array. Reverses in-place (thus provided array is + * mutated), but also returns same for convenience. + * + * @param array to reverse (mutated in-place, but also returned for + * convenience). + * + * @return the reversed array (mutated in-place, but also returned for + * convenience). + * @since 1.5 + */ + public static byte[] reverse(final byte[] array) { + final int z = array.length - 1; // position of last element + for (int i = 0; i < array.length / 2; i++) { + final byte x = array[i]; + array[i] = array[z - i]; + array[z - i] = x; + } + return array; + } + + /** + * Converts a BigInteger into a long, and blows up + * (NumberFormatException) if the BigInteger is too big. + * + * @param big BigInteger to convert. + * @return long representation of the BigInteger. + */ + static long bigToLong(final BigInteger big) { + if (big.bitLength() <= 63) { // bitLength() doesn't count the sign bit. + return big.longValue(); + } + throw new NumberFormatException("The BigInteger cannot fit inside a 64 bit java long: [" + big + "]"); + } + + /** + * <p> + * Converts a long into a BigInteger. Negative numbers between -1 and + * -2^31 are treated as unsigned 32 bit (e.g., positive) integers. + * Negative numbers below -2^31 cause an IllegalArgumentException + * to be thrown. + * </p> + * + * @param l long to convert to BigInteger. + * @return BigInteger representation of the provided long. + */ + static BigInteger longToBig(long l) { + if (l < Integer.MIN_VALUE) { + throw new IllegalArgumentException("Negative longs < -2^31 not permitted: [" + l + "]"); + } else if (l < 0 && l >= Integer.MIN_VALUE) { + // If someone passes in a -2, they probably mean 4294967294 + // (For example, Unix UID/GID's are 32 bit unsigned.) + l = ZipUtil.adjustToLong((int) l); + } + return BigInteger.valueOf(l); + } + + /** + * Converts a signed byte into an unsigned integer representation + * (e.g., -1 becomes 255). + * + * @param b byte to convert to int + * @return int representation of the provided byte + * @since 1.5 + */ + public static int signedByteToUnsignedInt(final byte b) { + if (b >= 0) { + return b; + } + return 256 + b; + } + + /** + * Converts an unsigned integer to a signed byte (e.g., 255 becomes -1). + * + * @param i integer to convert to byte + * @return byte representation of the provided int + * @throws IllegalArgumentException if the provided integer is not inside the range [0,255]. + * @since 1.5 + */ + public static byte unsignedIntToSignedByte(final int i) { + if (i > 255 || i < 0) { + throw new IllegalArgumentException("Can only convert non-negative integers between [0,255] to byte: [" + i + "]"); + } + if (i < 128) { + return (byte) i; + } + return (byte) (i - 256); + } + + /** + * Convert a DOS date/time field to a Date object. + * + * @param zipDosTime contains the stored DOS time. + * @return a Date instance corresponding to the given time. + */ + public static Date fromDosTime(final ZipLong zipDosTime) { + final long dosTime = zipDosTime.getValue(); + return new Date(dosToJavaTime(dosTime)); + } + + /** + * Converts DOS time to Java time (number of milliseconds since + * epoch). + * @param dosTime time to convert + * @return converted time + */ + public static long dosToJavaTime(final long dosTime) { + final Calendar cal = Calendar.getInstance(); + // CheckStyle:MagicNumberCheck OFF - no point + cal.set(Calendar.YEAR, (int) ((dosTime >> 25) & 0x7f) + 1980); + cal.set(Calendar.MONTH, (int) ((dosTime >> 21) & 0x0f) - 1); + cal.set(Calendar.DATE, (int) (dosTime >> 16) & 0x1f); + cal.set(Calendar.HOUR_OF_DAY, (int) (dosTime >> 11) & 0x1f); + cal.set(Calendar.MINUTE, (int) (dosTime >> 5) & 0x3f); + cal.set(Calendar.SECOND, (int) (dosTime << 1) & 0x3e); + cal.set(Calendar.MILLISECOND, 0); + // CheckStyle:MagicNumberCheck ON + return cal.getTime().getTime(); + } + + /** + * If the entry has Unicode*ExtraFields and the CRCs of the + * names/comments match those of the extra fields, transfer the + * known Unicode values from the extra field. + */ + static void setNameAndCommentFromExtraFields(final ZipArchiveEntry ze, + final byte[] originalNameBytes, + final byte[] commentBytes) { + final UnicodePathExtraField name = (UnicodePathExtraField) + ze.getExtraField(UnicodePathExtraField.UPATH_ID); + final String newName = getUnicodeStringIfOriginalMatches(name, + originalNameBytes); + if (newName != null) { + ze.setName(newName); + ze.setNameSource(ZipArchiveEntry.NameSource.UNICODE_EXTRA_FIELD); + } + + if (commentBytes != null && commentBytes.length > 0) { + final UnicodeCommentExtraField cmt = (UnicodeCommentExtraField) + ze.getExtraField(UnicodeCommentExtraField.UCOM_ID); + final String newComment = + getUnicodeStringIfOriginalMatches(cmt, commentBytes); + if (newComment != null) { + ze.setComment(newComment); + ze.setCommentSource(ZipArchiveEntry.CommentSource.UNICODE_EXTRA_FIELD); + } + } + } + + /** + * If the stored CRC matches the one of the given name, return the + * Unicode name of the given field. + * + * <p>If the field is null or the CRCs don't match, return null + * instead.</p> + */ + private static + String getUnicodeStringIfOriginalMatches(final AbstractUnicodeExtraField f, + final byte[] orig) { + if (f != null) { + final CRC32 crc32 = new CRC32(); + crc32.update(orig); + final long origCRC32 = crc32.getValue(); + + if (origCRC32 == f.getNameCRC32()) { + try { + return ZipEncodingHelper + .UTF8_ZIP_ENCODING.decode(f.getUnicodeName()); + } catch (final IOException ex) { + // UTF-8 unsupported? should be impossible the + // Unicode*ExtraField must contain some bad bytes + + // TODO log this anywhere? + return null; + } + } + } + return null; + } + + /** + * Create a copy of the given array - or return null if the + * argument is null. + */ + static byte[] copy(final byte[] from) { + if (from != null) { + final byte[] to = new byte[from.length]; + System.arraycopy(from, 0, to, 0, to.length); + return to; + } + return null; + } + + static void copy(final byte[] from, final byte[] to, final int offset) { + if (from != null) { + System.arraycopy(from, 0, to, offset, from.length); + } + } + + + /** + * Whether this library is able to read or write the given entry. + */ + static boolean canHandleEntryData(final ZipArchiveEntry entry) { + return supportsEncryptionOf(entry) && supportsMethodOf(entry); + } + + /** + * Whether this library supports the encryption used by the given + * entry. + * + * @return true if the entry isn't encrypted at all + */ + private static boolean supportsEncryptionOf(final ZipArchiveEntry entry) { + return !entry.getGeneralPurposeBit().usesEncryption(); + } + + /** + * Whether this library supports the compression method used by + * the given entry. + * + * @return true if the compression method is supported + */ + private static boolean supportsMethodOf(final ZipArchiveEntry entry) { + return entry.getMethod() == ZipEntry.STORED + || entry.getMethod() == ZipMethod.UNSHRINKING.getCode() + || entry.getMethod() == ZipMethod.IMPLODING.getCode() + || entry.getMethod() == ZipEntry.DEFLATED + || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() + || entry.getMethod() == ZipMethod.BZIP2.getCode(); + } + + /** + * Checks whether the entry requires features not (yet) supported + * by the library and throws an exception if it does. + */ + static void checkRequestedFeatures(final ZipArchiveEntry ze) + throws UnsupportedZipFeatureException { + if (!supportsEncryptionOf(ze)) { + throw + new UnsupportedZipFeatureException(UnsupportedZipFeatureException + .Feature.ENCRYPTION, ze); + } + if (!supportsMethodOf(ze)) { + final ZipMethod m = ZipMethod.getMethodByCode(ze.getMethod()); + if (m == null) { + throw + new UnsupportedZipFeatureException(UnsupportedZipFeatureException + .Feature.METHOD, ze); + } + throw new UnsupportedZipFeatureException(m, ze); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/package.html b/src/main/java/org/apache/commons/compress/archivers/zip/package.html new file mode 100644 index 000000000..521687be6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/zip/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for reading and writing archives using + the ZIP format.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/changes/Change.java b/src/main/java/org/apache/commons/compress/changes/Change.java new file mode 100644 index 000000000..fb901bd41 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/changes/Change.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.changes; + +import java.io.InputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * Change holds meta information about a change. + * + * @Immutable + */ +class Change { + private final String targetFile; // entry name to delete + private final ArchiveEntry entry; // new entry to add + private final InputStream input; // source for new entry + private final boolean replaceMode; // change should replaceMode existing entries + + // Type of change + private final int type; + // Possible type values + static final int TYPE_DELETE = 1; + static final int TYPE_ADD = 2; + static final int TYPE_MOVE = 3; // NOT USED + static final int TYPE_DELETE_DIR = 4; + + /** + * Constructor. Takes the filename of the file to be deleted + * from the stream as argument. + * @param pFilename the filename of the file to delete + */ + Change(final String pFilename, final int type) { + if(pFilename == null) { + throw new NullPointerException(); + } + this.targetFile = pFilename; + this.type = type; + this.input = null; + this.entry = null; + this.replaceMode = true; + } + + /** + * Construct a change which adds an entry. + * + * @param pEntry the entry details + * @param pInput the InputStream for the entry data + */ + Change(final ArchiveEntry pEntry, final InputStream pInput, final boolean replace) { + if(pEntry == null || pInput == null) { + throw new NullPointerException(); + } + this.entry = pEntry; + this.input = pInput; + type = TYPE_ADD; + targetFile = null; + this.replaceMode = replace; + } + + ArchiveEntry getEntry() { + return entry; + } + + InputStream getInput() { + return input; + } + + String targetFile() { + return targetFile; + } + + int type() { + return type; + } + + boolean isReplaceMode() { + return replaceMode; + } +} diff --git a/src/main/java/org/apache/commons/compress/changes/ChangeSet.java b/src/main/java/org/apache/commons/compress/changes/ChangeSet.java new file mode 100644 index 000000000..c0f8c61e4 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/changes/ChangeSet.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.changes; + +import java.io.InputStream; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * ChangeSet collects and performs changes to an archive. + * Putting delete changes in this ChangeSet from multiple threads can + * cause conflicts. + * + * @NotThreadSafe + */ +public final class ChangeSet { + + private final Set<Change> changes = new LinkedHashSet<>(); + + /** + * Deletes the file with the filename from the archive. + * + * @param filename + * the filename of the file to delete + */ + public void delete(final String filename) { + addDeletion(new Change(filename, Change.TYPE_DELETE)); + } + + /** + * Deletes the directory tree from the archive. + * + * @param dirName + * the name of the directory tree to delete + */ + public void deleteDir(final String dirName) { + addDeletion(new Change(dirName, Change.TYPE_DELETE_DIR)); + } + + /** + * Adds a new archive entry to the archive. + * + * @param pEntry + * the entry to add + * @param pInput + * the datastream to add + */ + public void add(final ArchiveEntry pEntry, final InputStream pInput) { + this.add(pEntry, pInput, true); + } + + /** + * Adds a new archive entry to the archive. + * If replace is set to true, this change will replace all other additions + * done in this ChangeSet and all existing entries in the original stream. + * + * @param pEntry + * the entry to add + * @param pInput + * the datastream to add + * @param replace + * indicates the this change should replace existing entries + */ + public void add(final ArchiveEntry pEntry, final InputStream pInput, final boolean replace) { + addAddition(new Change(pEntry, pInput, replace)); + } + + /** + * Adds an addition change. + * + * @param pChange + * the change which should result in an addition + */ + private void addAddition(final Change pChange) { + if (Change.TYPE_ADD != pChange.type() || + pChange.getInput() == null) { + return; + } + + if (!changes.isEmpty()) { + for (final Iterator<Change> it = changes.iterator(); it.hasNext();) { + final Change change = it.next(); + if (change.type() == Change.TYPE_ADD + && change.getEntry() != null) { + final ArchiveEntry entry = change.getEntry(); + + if(entry.equals(pChange.getEntry())) { + if(pChange.isReplaceMode()) { + it.remove(); + changes.add(pChange); + return; + } + // do not add this change + return; + } + } + } + } + changes.add(pChange); + } + + /** + * Adds an delete change. + * + * @param pChange + * the change which should result in a deletion + */ + private void addDeletion(final Change pChange) { + if ((Change.TYPE_DELETE != pChange.type() && + Change.TYPE_DELETE_DIR != pChange.type()) || + pChange.targetFile() == null) { + return; + } + final String source = pChange.targetFile(); + + if (source != null && !changes.isEmpty()) { + for (final Iterator<Change> it = changes.iterator(); it.hasNext();) { + final Change change = it.next(); + if (change.type() == Change.TYPE_ADD + && change.getEntry() != null) { + final String target = change.getEntry().getName(); + + if (target == null) { + continue; + } + + if (Change.TYPE_DELETE == pChange.type() && source.equals(target) || + (Change.TYPE_DELETE_DIR == pChange.type() && target.matches(source + "/.*"))) { + it.remove(); + } + } + } + } + changes.add(pChange); + } + + /** + * Returns the list of changes as a copy. Changes on this set + * are not reflected on this ChangeSet and vice versa. + * @return the changes as a copy + */ + Set<Change> getChanges() { + return new LinkedHashSet<>(changes); + } +} diff --git a/src/main/java/org/apache/commons/compress/changes/ChangeSetPerformer.java b/src/main/java/org/apache/commons/compress/changes/ChangeSetPerformer.java new file mode 100644 index 000000000..bec6b6421 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/changes/ChangeSetPerformer.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.changes; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.compress.utils.IOUtils; + +/** + * Performs ChangeSet operations on a stream. + * This class is thread safe and can be used multiple times. + * It operates on a copy of the ChangeSet. If the ChangeSet changes, + * a new Performer must be created. + * + * @ThreadSafe + * @Immutable + */ +public class ChangeSetPerformer { + private final Set<Change> changes; + + /** + * Constructs a ChangeSetPerformer with the changes from this ChangeSet + * @param changeSet the ChangeSet which operations are used for performing + */ + public ChangeSetPerformer(final ChangeSet changeSet) { + changes = changeSet.getChanges(); + } + + /** + * Performs all changes collected in this ChangeSet on the input stream and + * streams the result to the output stream. Perform may be called more than once. + * + * This method finishes the stream, no other entries should be added + * after that. + * + * @param in + * the InputStream to perform the changes on + * @param out + * the resulting OutputStream with all modifications + * @throws IOException + * if an read/write error occurs + * @return the results of this operation + */ + public ChangeSetResults perform(final ArchiveInputStream in, final ArchiveOutputStream out) + throws IOException { + return perform(new ArchiveInputStreamIterator(in), out); + } + + /** + * Performs all changes collected in this ChangeSet on the ZipFile and + * streams the result to the output stream. Perform may be called more than once. + * + * This method finishes the stream, no other entries should be added + * after that. + * + * @param in + * the ZipFile to perform the changes on + * @param out + * the resulting OutputStream with all modifications + * @throws IOException + * if an read/write error occurs + * @return the results of this operation + * @since 1.5 + */ + public ChangeSetResults perform(final ZipFile in, final ArchiveOutputStream out) + throws IOException { + return perform(new ZipFileIterator(in), out); + } + + /** + * Performs all changes collected in this ChangeSet on the input entries and + * streams the result to the output stream. + * + * This method finishes the stream, no other entries should be added + * after that. + * + * @param entryIterator + * the entries to perform the changes on + * @param out + * the resulting OutputStream with all modifications + * @throws IOException + * if an read/write error occurs + * @return the results of this operation + */ + private ChangeSetResults perform(final ArchiveEntryIterator entryIterator, + final ArchiveOutputStream out) + throws IOException { + final ChangeSetResults results = new ChangeSetResults(); + + final Set<Change> workingSet = new LinkedHashSet<>(changes); + + for (final Iterator<Change> it = workingSet.iterator(); it.hasNext();) { + final Change change = it.next(); + + if (change.type() == Change.TYPE_ADD && change.isReplaceMode()) { + copyStream(change.getInput(), out, change.getEntry()); + it.remove(); + results.addedFromChangeSet(change.getEntry().getName()); + } + } + + while (entryIterator.hasNext()) { + final ArchiveEntry entry = entryIterator.next(); + boolean copy = true; + + for (final Iterator<Change> it = workingSet.iterator(); it.hasNext();) { + final Change change = it.next(); + + final int type = change.type(); + final String name = entry.getName(); + if (type == Change.TYPE_DELETE && name != null) { + if (name.equals(change.targetFile())) { + copy = false; + it.remove(); + results.deleted(name); + break; + } + } else if (type == Change.TYPE_DELETE_DIR && name != null) { + // don't combine ifs to make future extensions more easy + if (name.startsWith(change.targetFile() + "/")) { // NOPMD + copy = false; + results.deleted(name); + break; + } + } + } + + if (copy + && !isDeletedLater(workingSet, entry) + && !results.hasBeenAdded(entry.getName())) { + copyStream(entryIterator.getInputStream(), out, entry); + results.addedFromStream(entry.getName()); + } + } + + // Adds files which hasn't been added from the original and do not have replace mode on + for (final Iterator<Change> it = workingSet.iterator(); it.hasNext();) { + final Change change = it.next(); + + if (change.type() == Change.TYPE_ADD && + !change.isReplaceMode() && + !results.hasBeenAdded(change.getEntry().getName())) { + copyStream(change.getInput(), out, change.getEntry()); + it.remove(); + results.addedFromChangeSet(change.getEntry().getName()); + } + } + out.finish(); + return results; + } + + /** + * Checks if an ArchiveEntry is deleted later in the ChangeSet. This is + * necessary if an file is added with this ChangeSet, but later became + * deleted in the same set. + * + * @param entry + * the entry to check + * @return true, if this entry has an deletion change later, false otherwise + */ + private boolean isDeletedLater(final Set<Change> workingSet, final ArchiveEntry entry) { + final String source = entry.getName(); + + if (!workingSet.isEmpty()) { + for (final Change change : workingSet) { + final int type = change.type(); + final String target = change.targetFile(); + if (type == Change.TYPE_DELETE && source.equals(target)) { + return true; + } + + if (type == Change.TYPE_DELETE_DIR && source.startsWith(target + "/")){ + return true; + } + } + } + return false; + } + + /** + * Copies the ArchiveEntry to the Output stream + * + * @param in + * the stream to read the data from + * @param out + * the stream to write the data to + * @param entry + * the entry to write + * @throws IOException + * if data cannot be read or written + */ + private void copyStream(final InputStream in, final ArchiveOutputStream out, + final ArchiveEntry entry) throws IOException { + out.putArchiveEntry(entry); + IOUtils.copy(in, out); + out.closeArchiveEntry(); + } + + /** + * Used in perform to abstract out getting entries and streams for + * those entries. + * + * <p>Iterator#hasNext is not allowed to throw exceptions that's + * why we can't use Iterator<ArchiveEntry> directly - + * otherwise we'd need to convert exceptions thrown in + * ArchiveInputStream#getNextEntry.</p> + */ + interface ArchiveEntryIterator { + boolean hasNext() throws IOException; + ArchiveEntry next(); + InputStream getInputStream() throws IOException; + } + + private static class ArchiveInputStreamIterator + implements ArchiveEntryIterator { + private final ArchiveInputStream in; + private ArchiveEntry next; + ArchiveInputStreamIterator(final ArchiveInputStream in) { + this.in = in; + } + @Override + public boolean hasNext() throws IOException { + return (next = in.getNextEntry()) != null; + } + @Override + public ArchiveEntry next() { + return next; + } + @Override + public InputStream getInputStream() { + return in; + } + } + + private static class ZipFileIterator + implements ArchiveEntryIterator { + private final ZipFile in; + private final Enumeration<ZipArchiveEntry> nestedEnum; + private ZipArchiveEntry current; + ZipFileIterator(final ZipFile in) { + this.in = in; + nestedEnum = in.getEntriesInPhysicalOrder(); + } + @Override + public boolean hasNext() { + return nestedEnum.hasMoreElements(); + } + @Override + public ArchiveEntry next() { + current = nestedEnum.nextElement(); + return current; + } + @Override + public InputStream getInputStream() throws IOException { + return in.getInputStream(current); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/changes/ChangeSetResults.java b/src/main/java/org/apache/commons/compress/changes/ChangeSetResults.java new file mode 100644 index 000000000..788dccfae --- /dev/null +++ b/src/main/java/org/apache/commons/compress/changes/ChangeSetResults.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.changes; + +import java.util.ArrayList; +import java.util.List; + +/** + * Stores the results of an performed ChangeSet operation. + */ +public class ChangeSetResults { + private final List<String> addedFromChangeSet = new ArrayList<>(); + private final List<String> addedFromStream = new ArrayList<>(); + private final List<String> deleted = new ArrayList<>(); + + /** + * Adds the filename of a recently deleted file to the result list. + * @param fileName the file which has been deleted + */ + void deleted(final String fileName) { + deleted.add(fileName); + } + + /** + * Adds the name of a file to the result list which has been + * copied from the source stream to the target stream. + * @param fileName the file name which has been added from the original stream + */ + void addedFromStream(final String fileName) { + addedFromStream.add(fileName); + } + + /** + * Adds the name of a file to the result list which has been + * copied from the changeset to the target stream + * @param fileName the name of the file + */ + void addedFromChangeSet(final String fileName) { + addedFromChangeSet.add(fileName); + } + + /** + * Returns a list of filenames which has been added from the changeset + * @return the list of filenames + */ + public List<String> getAddedFromChangeSet() { + return addedFromChangeSet; + } + + /** + * Returns a list of filenames which has been added from the original stream + * @return the list of filenames + */ + public List<String> getAddedFromStream() { + return addedFromStream; + } + + /** + * Returns a list of filenames which has been deleted + * @return the list of filenames + */ + public List<String> getDeleted() { + return deleted; + } + + /** + * Checks if an filename already has been added to the result list + * @param filename the filename to check + * @return true, if this filename already has been added + */ + boolean hasBeenAdded(final String filename) { + return addedFromChangeSet.contains(filename) || addedFromStream.contains(filename); + } +} diff --git a/src/main/java/org/apache/commons/compress/changes/package.html b/src/main/java/org/apache/commons/compress/changes/package.html new file mode 100644 index 000000000..4ba3e87d0 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/changes/package.html @@ -0,0 +1,27 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p><b>EXPERIMENTAL</b> support for changesets that are applied to + archives.</p> + + <p>This API is considered unstable and may be modified or even + removed in future releases.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorException.java b/src/main/java/org/apache/commons/compress/compressors/CompressorException.java new file mode 100644 index 000000000..9af3e6969 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/CompressorException.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +/** + * Compressor related exception + */ +public class CompressorException extends Exception { + + /** Serial */ + private static final long serialVersionUID = -2932901310255908814L; + + /** + * Constructs a new exception with the specified detail message. The cause + * is not initialized. + * + * @param message + * the detail message + */ + public CompressorException(final String message) { + super(message); + } + + /** + * Constructs a new exception with the specified detail message and cause. + * + * @param message + * the detail message + * @param cause + * the cause + */ + public CompressorException(final String message, final Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/CompressorInputStream.java new file mode 100644 index 000000000..67de705a2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/CompressorInputStream.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import java.io.InputStream; + +public abstract class CompressorInputStream extends InputStream { + private long bytesRead = 0; + + /** + * Increments the counter of already read bytes. + * Doesn't increment if the EOF has been hit (read == -1) + * + * @param read the number of bytes read + * + * @since 1.1 + */ + protected void count(final int read) { + count((long) read); + } + + /** + * Increments the counter of already read bytes. + * Doesn't increment if the EOF has been hit (read == -1) + * + * @param read the number of bytes read + */ + protected void count(final long read) { + if (read != -1) { + bytesRead = bytesRead + read; + } + } + + /** + * Decrements the counter of already read bytes. + * + * @param pushedBack the number of bytes pushed back. + * @since 1.7 + */ + protected void pushedBackBytes(final long pushedBack) { + bytesRead -= pushedBack; + } + + /** + * Returns the current number of bytes read from this stream. + * @return the number of read bytes + * @deprecated this method may yield wrong results for large + * archives, use #getBytesRead instead + */ + @Deprecated + public int getCount() { + return (int) bytesRead; + } + + /** + * Returns the current number of bytes read from this stream. + * @return the number of read bytes + * + * @since 1.1 + */ + public long getBytesRead() { + return bytesRead; + } + + /** + * Returns the amount of raw or compressed bytes read by the stream. + * + * <p>This implementation invokes {@link #getBytesRead}.</p> + * + * <p>Provides half of {@link + * org.apache.commons.compress.utils.InputStreamStatistics} + * without forcing subclasses to implement the other half.</p> + * + * @return the amount of decompressed bytes returned by the stream + * @since 1.17 + */ + public long getUncompressedCount() { + return getBytesRead(); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/CompressorOutputStream.java new file mode 100644 index 000000000..51eee9cee --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/CompressorOutputStream.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import java.io.OutputStream; + +public abstract class CompressorOutputStream extends OutputStream { + // TODO +} diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java new file mode 100644 index 000000000..d730b9de4 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java @@ -0,0 +1,797 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.Locale; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.compress.compressors.brotli.BrotliCompressorInputStream; +import org.apache.commons.compress.compressors.brotli.BrotliUtils; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream; +import org.apache.commons.compress.compressors.deflate.DeflateCompressorOutputStream; +import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.compress.compressors.lz4.BlockLZ4CompressorInputStream; +import org.apache.commons.compress.compressors.lz4.BlockLZ4CompressorOutputStream; +import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorInputStream; +import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream; +import org.apache.commons.compress.compressors.lzma.LZMACompressorInputStream; +import org.apache.commons.compress.compressors.lzma.LZMACompressorOutputStream; +import org.apache.commons.compress.compressors.lzma.LZMAUtils; +import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream; +import org.apache.commons.compress.compressors.pack200.Pack200CompressorOutputStream; +import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream; +import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorOutputStream; +import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; +import org.apache.commons.compress.compressors.xz.XZUtils; +import org.apache.commons.compress.compressors.z.ZCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdUtils; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.Lists; +import org.apache.commons.compress.utils.ServiceLoaderIterator; +import org.apache.commons.compress.utils.Sets; + +/** + * <p> + * Factory to create Compressor[In|Out]putStreams from names. To add other + * implementations you should extend CompressorStreamFactory and override the + * appropriate methods (and call their implementation from super of course). + * </p> + * + * Example (Compressing a file): + * + * <pre> + * final OutputStream out = Files.newOutputStream(output.toPath()); + * CompressorOutputStream cos = new CompressorStreamFactory() + * .createCompressorOutputStream(CompressorStreamFactory.BZIP2, out); + * IOUtils.copy(Files.newInputStream(input.toPath()), cos); + * cos.close(); + * </pre> + * + * Example (Decompressing a file): + * + * <pre> + * final InputStream is = Files.newInputStream(input.toPath()); + * CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream(CompressorStreamFactory.BZIP2, + * is); + * IOUtils.copy(in, Files.newOutputStream(output.toPath())); + * in.close(); + * </pre> + * + * @Immutable provided that the deprecated method setDecompressConcatenated is + * not used. + * @ThreadSafe even if the deprecated method setDecompressConcatenated is used + */ +public class CompressorStreamFactory implements CompressorStreamProvider { + + private static final CompressorStreamFactory SINGLETON = new CompressorStreamFactory(); + + + + /** + * Constant (value {@value}) used to identify the BROTLI compression + * algorithm. + * + * @since 1.14 + */ + public static final String BROTLI = "br"; + + /** + * Constant (value {@value}) used to identify the BZIP2 compression + * algorithm. + * + * @since 1.1 + */ + public static final String BZIP2 = "bzip2"; + + /** + * Constant (value {@value}) used to identify the GZIP compression + * algorithm. + * + * @since 1.1 + */ + public static final String GZIP = "gz"; + + /** + * Constant (value {@value}) used to identify the PACK200 compression + * algorithm. + * + * @since 1.3 + */ + public static final String PACK200 = "pack200"; + + /** + * Constant (value {@value}) used to identify the XZ compression method. + * + * @since 1.4 + */ + public static final String XZ = "xz"; + + /** + * Constant (value {@value}) used to identify the LZMA compression method. + * + * @since 1.6 + */ + public static final String LZMA = "lzma"; + + /** + * Constant (value {@value}) used to identify the "framed" Snappy + * compression method. + * + * @since 1.7 + */ + public static final String SNAPPY_FRAMED = "snappy-framed"; + + /** + * Constant (value {@value}) used to identify the "raw" Snappy compression + * method. Not supported as an output stream type. + * + * @since 1.7 + */ + public static final String SNAPPY_RAW = "snappy-raw"; + + /** + * Constant (value {@value}) used to identify the traditional Unix compress + * method. Not supported as an output stream type. + * + * @since 1.7 + */ + public static final String Z = "z"; + + /** + * Constant (value {@value}) used to identify the Deflate compress method. + * + * @since 1.9 + */ + public static final String DEFLATE = "deflate"; + + /** + * Constant (value {@value}) used to identify the Deflate64 compress method. + * + * @since 1.16 + */ + public static final String DEFLATE64 = "deflate64"; + + /** + * Constant (value {@value}) used to identify the block LZ4 + * compression method. + * + * @since 1.14 + */ + public static final String LZ4_BLOCK = "lz4-block"; + + /** + * Constant (value {@value}) used to identify the frame LZ4 + * compression method. + * + * @since 1.14 + */ + public static final String LZ4_FRAMED = "lz4-framed"; + + /** + * Constant (value {@value}) used to identify the Zstandard compression + * algorithm. Not supported as an output stream type. + * + * @since 1.16 + */ + public static final String ZSTANDARD = "zstd"; + + private static final String YOU_NEED_BROTLI_DEC = youNeed("Google Brotli Dec", "https://github.com/google/brotli/"); + private static final String YOU_NEED_XZ_JAVA = youNeed("XZ for Java", "https://tukaani.org/xz/java.html"); + private static final String YOU_NEED_ZSTD_JNI = youNeed("Zstd JNI", "https://github.com/luben/zstd-jni"); + + private static String youNeed(String name, String url) { + return " In addition to Apache Commons Compress you need the " + name + " library - see " + url; + } + + /** + * Constructs a new sorted map from input stream provider names to provider + * objects. + * + * <p> + * The map returned by this method will have one entry for each provider for + * which support is available in the current Java virtual machine. If two or + * more supported provider have the same name then the resulting map will + * contain just one of them; which one it will contain is not specified. + * </p> + * + * <p> + * The invocation of this method, and the subsequent use of the resulting + * map, may cause time-consuming disk or network I/O operations to occur. + * This method is provided for applications that need to enumerate all of + * the available providers, for example to allow user provider selection. + * </p> + * + * <p> + * This method may return different results at different times if new + * providers are dynamically made available to the current Java virtual + * machine. + * </p> + * + * @return An immutable, map from names to provider objects + * @since 1.13 + */ + public static SortedMap<String, CompressorStreamProvider> findAvailableCompressorInputStreamProviders() { + return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, CompressorStreamProvider>>() { + @Override + public SortedMap<String, CompressorStreamProvider> run() { + final TreeMap<String, CompressorStreamProvider> map = new TreeMap<>(); + putAll(SINGLETON.getInputStreamCompressorNames(), SINGLETON, map); + for (final CompressorStreamProvider provider : findCompressorStreamProviders()) { + putAll(provider.getInputStreamCompressorNames(), provider, map); + } + return map; + } + }); + } + + /** + * Constructs a new sorted map from output stream provider names to provider + * objects. + * + * <p> + * The map returned by this method will have one entry for each provider for + * which support is available in the current Java virtual machine. If two or + * more supported provider have the same name then the resulting map will + * contain just one of them; which one it will contain is not specified. + * </p> + * + * <p> + * The invocation of this method, and the subsequent use of the resulting + * map, may cause time-consuming disk or network I/O operations to occur. + * This method is provided for applications that need to enumerate all of + * the available providers, for example to allow user provider selection. + * </p> + * + * <p> + * This method may return different results at different times if new + * providers are dynamically made available to the current Java virtual + * machine. + * </p> + * + * @return An immutable, map from names to provider objects + * @since 1.13 + */ + public static SortedMap<String, CompressorStreamProvider> findAvailableCompressorOutputStreamProviders() { + return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, CompressorStreamProvider>>() { + @Override + public SortedMap<String, CompressorStreamProvider> run() { + final TreeMap<String, CompressorStreamProvider> map = new TreeMap<>(); + putAll(SINGLETON.getOutputStreamCompressorNames(), SINGLETON, map); + for (final CompressorStreamProvider provider : findCompressorStreamProviders()) { + putAll(provider.getOutputStreamCompressorNames(), provider, map); + } + return map; + } + + }); + } + private static ArrayList<CompressorStreamProvider> findCompressorStreamProviders() { + return Lists.newArrayList(serviceLoaderIterator()); + } + + public static String getBrotli() { + return BROTLI; + } + + public static String getBzip2() { + return BZIP2; + } + + public static String getDeflate() { + return DEFLATE; + } + + /** + * @since 1.16 + * @return the constant {@link #DEFLATE64} + */ + public static String getDeflate64() { + return DEFLATE64; + } + + public static String getGzip() { + return GZIP; + } + + public static String getLzma() { + return LZMA; + } + + public static String getPack200() { + return PACK200; + } + + public static CompressorStreamFactory getSingleton() { + return SINGLETON; + } + + public static String getSnappyFramed() { + return SNAPPY_FRAMED; + } + + public static String getSnappyRaw() { + return SNAPPY_RAW; + } + + public static String getXz() { + return XZ; + } + + public static String getZ() { + return Z; + } + + public static String getLZ4Framed() { + return LZ4_FRAMED; + } + + public static String getLZ4Block() { + return LZ4_BLOCK; + } + + public static String getZstandard() { + return ZSTANDARD; + } + + static void putAll(final Set<String> names, final CompressorStreamProvider provider, + final TreeMap<String, CompressorStreamProvider> map) { + for (final String name : names) { + map.put(toKey(name), provider); + } + } + + private static Iterator<CompressorStreamProvider> serviceLoaderIterator() { + return new ServiceLoaderIterator<>(CompressorStreamProvider.class); + } + + private static String toKey(final String name) { + return name.toUpperCase(Locale.ROOT); + } + + /** + * If true, decompress until the end of the input. If false, stop after the + * first stream and leave the input position to point to the next byte after + * the stream + */ + private final Boolean decompressUntilEOF; + // This is Boolean so setDecompressConcatenated can determine whether it has + // been set by the ctor + // once the setDecompressConcatenated method has been removed, it can revert + // to boolean + + private SortedMap<String, CompressorStreamProvider> compressorInputStreamProviders; + + private SortedMap<String, CompressorStreamProvider> compressorOutputStreamProviders; + + /** + * If true, decompress until the end of the input. If false, stop after the + * first stream and leave the input position to point to the next byte after + * the stream + */ + private volatile boolean decompressConcatenated = false; + + private final int memoryLimitInKb; + /** + * Create an instance with the decompress Concatenated option set to false. + */ + public CompressorStreamFactory() { + this.decompressUntilEOF = null; + this.memoryLimitInKb = -1; + } + + /** + * Create an instance with the provided decompress Concatenated option. + * + * @param decompressUntilEOF + * if true, decompress until the end of the input; if false, stop + * after the first stream and leave the input position to point + * to the next byte after the stream. This setting applies to the + * gzip, bzip2 and xz formats only. + * + * @param memoryLimitInKb + * Some streams require allocation of potentially significant + * byte arrays/tables, and they can offer checks to prevent OOMs + * on corrupt files. Set the maximum allowed memory allocation in KBs. + * + * @since 1.14 + */ + public CompressorStreamFactory(final boolean decompressUntilEOF, final int memoryLimitInKb) { + this.decompressUntilEOF = decompressUntilEOF; + // Also copy to existing variable so can continue to use that as the + // current value + this.decompressConcatenated = decompressUntilEOF; + this.memoryLimitInKb = memoryLimitInKb; + } + + + /** + * Create an instance with the provided decompress Concatenated option. + * + * @param decompressUntilEOF + * if true, decompress until the end of the input; if false, stop + * after the first stream and leave the input position to point + * to the next byte after the stream. This setting applies to the + * gzip, bzip2 and xz formats only. + * @since 1.10 + */ + public CompressorStreamFactory(final boolean decompressUntilEOF) { + this(decompressUntilEOF, -1); + } + + /** + * Try to detect the type of compressor stream. + * + * @param in input stream + * @return type of compressor stream detected + * @throws CompressorException if no compressor stream type was detected + * or if something else went wrong + * @throws IllegalArgumentException if stream is null or does not support mark + * + * @since 1.14 + */ + public static String detect(final InputStream in) throws CompressorException { + if (in == null) { + throw new IllegalArgumentException("Stream must not be null."); + } + + if (!in.markSupported()) { + throw new IllegalArgumentException("Mark is not supported."); + } + + final byte[] signature = new byte[12]; + in.mark(signature.length); + int signatureLength = -1; + try { + signatureLength = IOUtils.readFully(in, signature); + in.reset(); + } catch (IOException e) { + throw new CompressorException("IOException while reading signature.", e); + } + + if (BZip2CompressorInputStream.matches(signature, signatureLength)) { + return BZIP2; + } + + if (GzipCompressorInputStream.matches(signature, signatureLength)) { + return GZIP; + } + + if (Pack200CompressorInputStream.matches(signature, signatureLength)) { + return PACK200; + } + + if (FramedSnappyCompressorInputStream.matches(signature, signatureLength)) { + return SNAPPY_FRAMED; + } + + if (ZCompressorInputStream.matches(signature, signatureLength)) { + return Z; + } + + if (DeflateCompressorInputStream.matches(signature, signatureLength)) { + return DEFLATE; + } + + if (XZUtils.matches(signature, signatureLength)) { + return XZ; + } + + if (LZMAUtils.matches(signature, signatureLength)) { + return LZMA; + } + + if (FramedLZ4CompressorInputStream.matches(signature, signatureLength)) { + return LZ4_FRAMED; + } + + if (ZstdUtils.matches(signature, signatureLength)) { + return ZSTANDARD; + } + + throw new CompressorException("No Compressor found for the stream signature."); + } + /** + * Create an compressor input stream from an input stream, autodetecting the + * compressor type from the first few bytes of the stream. The InputStream + * must support marks, like BufferedInputStream. + * + * @param in + * the input stream + * @return the compressor input stream + * @throws CompressorException + * if the compressor name is not known + * @throws IllegalArgumentException + * if the stream is null or does not support mark + * @since 1.1 + */ + public CompressorInputStream createCompressorInputStream(final InputStream in) throws CompressorException { + return createCompressorInputStream(detect(in), in); + } + + /** + * Creates a compressor input stream from a compressor name and an input + * stream. + * + * @param name + * of the compressor, i.e. {@value #GZIP}, {@value #BZIP2}, + * {@value #XZ}, {@value #LZMA}, {@value #PACK200}, + * {@value #SNAPPY_RAW}, {@value #SNAPPY_FRAMED}, {@value #Z}, + * {@value #LZ4_BLOCK}, {@value #LZ4_FRAMED}, {@value #ZSTANDARD}, + * {@value #DEFLATE64} + * or {@value #DEFLATE} + * @param in + * the input stream + * @return compressor input stream + * @throws CompressorException + * if the compressor name is not known or not available, + * or if there's an IOException or MemoryLimitException thrown + * during initialization + * @throws IllegalArgumentException + * if the name or input stream is null + */ + public CompressorInputStream createCompressorInputStream(final String name, final InputStream in) + throws CompressorException { + return createCompressorInputStream(name, in, decompressConcatenated); + } + + @Override + public CompressorInputStream createCompressorInputStream(final String name, final InputStream in, + final boolean actualDecompressConcatenated) throws CompressorException { + if (name == null || in == null) { + throw new IllegalArgumentException("Compressor name and stream must not be null."); + } + + try { + + if (GZIP.equalsIgnoreCase(name)) { + return new GzipCompressorInputStream(in, actualDecompressConcatenated); + } + + if (BZIP2.equalsIgnoreCase(name)) { + return new BZip2CompressorInputStream(in, actualDecompressConcatenated); + } + + if (BROTLI.equalsIgnoreCase(name)) { + if (!BrotliUtils.isBrotliCompressionAvailable()) { + throw new CompressorException("Brotli compression is not available." + YOU_NEED_BROTLI_DEC); + } + return new BrotliCompressorInputStream(in); + } + + if (XZ.equalsIgnoreCase(name)) { + if (!XZUtils.isXZCompressionAvailable()) { + throw new CompressorException("XZ compression is not available." + YOU_NEED_XZ_JAVA); + } + return new XZCompressorInputStream(in, actualDecompressConcatenated, memoryLimitInKb); + } + + if (ZSTANDARD.equalsIgnoreCase(name)) { + if (!ZstdUtils.isZstdCompressionAvailable()) { + throw new CompressorException("Zstandard compression is not available." + YOU_NEED_ZSTD_JNI); + } + return new ZstdCompressorInputStream(in); + } + + if (LZMA.equalsIgnoreCase(name)) { + if (!LZMAUtils.isLZMACompressionAvailable()) { + throw new CompressorException("LZMA compression is not available" + YOU_NEED_XZ_JAVA); + } + return new LZMACompressorInputStream(in, memoryLimitInKb); + } + + if (PACK200.equalsIgnoreCase(name)) { + return new Pack200CompressorInputStream(in); + } + + if (SNAPPY_RAW.equalsIgnoreCase(name)) { + return new SnappyCompressorInputStream(in); + } + + if (SNAPPY_FRAMED.equalsIgnoreCase(name)) { + return new FramedSnappyCompressorInputStream(in); + } + + if (Z.equalsIgnoreCase(name)) { + return new ZCompressorInputStream(in, memoryLimitInKb); + } + + if (DEFLATE.equalsIgnoreCase(name)) { + return new DeflateCompressorInputStream(in); + } + + if (DEFLATE64.equalsIgnoreCase(name)) { + return new Deflate64CompressorInputStream(in); + } + + if (LZ4_BLOCK.equalsIgnoreCase(name)) { + return new BlockLZ4CompressorInputStream(in); + } + + if (LZ4_FRAMED.equalsIgnoreCase(name)) { + return new FramedLZ4CompressorInputStream(in, actualDecompressConcatenated); + } + + } catch (final IOException e) { + throw new CompressorException("Could not create CompressorInputStream.", e); + } + final CompressorStreamProvider compressorStreamProvider = getCompressorInputStreamProviders().get(toKey(name)); + if (compressorStreamProvider != null) { + return compressorStreamProvider.createCompressorInputStream(name, in, actualDecompressConcatenated); + } + + throw new CompressorException("Compressor: " + name + " not found."); + } + + /** + * Creates an compressor output stream from an compressor name and an output + * stream. + * + * @param name + * the compressor name, i.e. {@value #GZIP}, {@value #BZIP2}, + * {@value #XZ}, {@value #PACK200}, {@value #SNAPPY_FRAMED}, + * {@value #LZ4_BLOCK}, {@value #LZ4_FRAMED}, {@value #ZSTANDARD} + * or {@value #DEFLATE} + * @param out + * the output stream + * @return the compressor output stream + * @throws CompressorException + * if the archiver name is not known + * @throws IllegalArgumentException + * if the archiver name or stream is null + */ + @Override + public CompressorOutputStream createCompressorOutputStream(final String name, final OutputStream out) + throws CompressorException { + if (name == null || out == null) { + throw new IllegalArgumentException("Compressor name and stream must not be null."); + } + + try { + + if (GZIP.equalsIgnoreCase(name)) { + return new GzipCompressorOutputStream(out); + } + + if (BZIP2.equalsIgnoreCase(name)) { + return new BZip2CompressorOutputStream(out); + } + + if (XZ.equalsIgnoreCase(name)) { + return new XZCompressorOutputStream(out); + } + + if (PACK200.equalsIgnoreCase(name)) { + return new Pack200CompressorOutputStream(out); + } + + if (LZMA.equalsIgnoreCase(name)) { + return new LZMACompressorOutputStream(out); + } + + if (DEFLATE.equalsIgnoreCase(name)) { + return new DeflateCompressorOutputStream(out); + } + + if (SNAPPY_FRAMED.equalsIgnoreCase(name)) { + return new FramedSnappyCompressorOutputStream(out); + } + + if (LZ4_BLOCK.equalsIgnoreCase(name)) { + return new BlockLZ4CompressorOutputStream(out); + } + + if (LZ4_FRAMED.equalsIgnoreCase(name)) { + return new FramedLZ4CompressorOutputStream(out); + } + + if (ZSTANDARD.equalsIgnoreCase(name)) { + return new ZstdCompressorOutputStream(out); + } + } catch (final IOException e) { + throw new CompressorException("Could not create CompressorOutputStream", e); + } + final CompressorStreamProvider compressorStreamProvider = getCompressorOutputStreamProviders().get(toKey(name)); + if (compressorStreamProvider != null) { + return compressorStreamProvider.createCompressorOutputStream(name, out); + } + throw new CompressorException("Compressor: " + name + " not found."); + } + + public SortedMap<String, CompressorStreamProvider> getCompressorInputStreamProviders() { + if (compressorInputStreamProviders == null) { + compressorInputStreamProviders = Collections + .unmodifiableSortedMap(findAvailableCompressorInputStreamProviders()); + } + return compressorInputStreamProviders; + } + + public SortedMap<String, CompressorStreamProvider> getCompressorOutputStreamProviders() { + if (compressorOutputStreamProviders == null) { + compressorOutputStreamProviders = Collections + .unmodifiableSortedMap(findAvailableCompressorOutputStreamProviders()); + } + return compressorOutputStreamProviders; + } + + // For Unit tests + boolean getDecompressConcatenated() { + return decompressConcatenated; + } + + public Boolean getDecompressUntilEOF() { + return decompressUntilEOF; + } + + @Override + public Set<String> getInputStreamCompressorNames() { + return Sets.newHashSet(GZIP, BROTLI, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_RAW, SNAPPY_FRAMED, Z, LZ4_BLOCK, + LZ4_FRAMED, ZSTANDARD, DEFLATE64); + } + + @Override + public Set<String> getOutputStreamCompressorNames() { + return Sets.newHashSet(GZIP, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_FRAMED, LZ4_BLOCK, LZ4_FRAMED, ZSTANDARD); + } + + /** + * Whether to decompress the full input or only the first stream in formats + * supporting multiple concatenated input streams. + * + * <p> + * This setting applies to the gzip, bzip2 and xz formats only. + * </p> + * + * @param decompressConcatenated + * if true, decompress until the end of the input; if false, stop + * after the first stream and leave the input position to point + * to the next byte after the stream + * @since 1.5 + * @deprecated 1.10 use the {@link #CompressorStreamFactory(boolean)} + * constructor instead + * @throws IllegalStateException + * if the constructor {@link #CompressorStreamFactory(boolean)} + * was used to create the factory + */ + @Deprecated + public void setDecompressConcatenated(final boolean decompressConcatenated) { + if (this.decompressUntilEOF != null) { + throw new IllegalStateException("Cannot override the setting defined by the constructor"); + } + this.decompressConcatenated = decompressConcatenated; + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamProvider.java b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamProvider.java new file mode 100644 index 000000000..b0c843123 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamProvider.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors; + +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Set; + +/** + * Creates Compressor {@link CompressorInputStream}s and + * {@link CompressorOutputStream}s. + * + * @since 1.13 + */ +public interface CompressorStreamProvider { + + /** + * Creates a compressor input stream from a compressor name and an input + * stream. + * + * @param name + * of the compressor, i.e. + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#GZIP}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#BZIP2}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#XZ}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#LZMA}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#PACK200}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#SNAPPY_RAW}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#SNAPPY_FRAMED}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#Z} + * or + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#DEFLATE} + * @param in + * the input stream + * @param decompressUntilEOF + * if true, decompress until the end of the input; if false, stop + * after the first stream and leave the input position to point + * to the next byte after the stream. This setting applies to the + * gzip, bzip2 and xz formats only. + * @return compressor input stream + * @throws CompressorException + * if the compressor name is not known + * @throws IllegalArgumentException + * if the name or input stream is null + */ + CompressorInputStream createCompressorInputStream(final String name, final InputStream in, + final boolean decompressUntilEOF) throws CompressorException; + + /** + * Creates a compressor output stream from an compressor name and an output + * stream. + * + * @param name + * the compressor name, i.e. + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#GZIP}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#BZIP2}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#XZ}, + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#PACK200} + * or + * {@value org.apache.commons.compress.compressors.CompressorStreamFactory#DEFLATE} + * @param out + * the output stream + * @return the compressor output stream + * @throws CompressorException + * if the archiver name is not known + * @throws IllegalArgumentException + * if the archiver name or stream is null + */ + CompressorOutputStream createCompressorOutputStream(final String name, final OutputStream out) + throws CompressorException; + + /** + * Gets all the input stream compressor names for this provider + * + * @return all the input compressor names for this provider + */ + Set<String> getInputStreamCompressorNames(); + + /** + * Gets all the output stream compressor names for this provider + * + * @return all the output compressor names for this provider + */ + Set<String> getOutputStreamCompressorNames(); + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java b/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java new file mode 100644 index 000000000..f97bb8e41 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +/** + * File name mapping code for the compression formats. + * @ThreadSafe + * @since 1.4 + */ +public class FileNameUtil { + + /** + * Map from common filename suffixes to the suffixes that identify compressed + * versions of those file types. For example: from ".tar" to ".tgz". + */ + private final Map<String, String> compressSuffix = + new HashMap<>(); + + /** + * Map from common filename suffixes of compressed files to the + * corresponding suffixes of uncompressed files. For example: from + * ".tgz" to ".tar". + * <p> + * This map also contains format-specific suffixes like ".gz" and "-z". + * These suffixes are mapped to the empty string, as they should simply + * be removed from the filename when the file is uncompressed. + */ + private final Map<String, String> uncompressSuffix; + + /** + * Length of the longest compressed suffix. + */ + private final int longestCompressedSuffix; + + /** + * Length of the shortest compressed suffix. + */ + private final int shortestCompressedSuffix; + + /** + * Length of the longest uncompressed suffix. + */ + private final int longestUncompressedSuffix; + + /** + * Length of the shortest uncompressed suffix longer than the + * empty string. + */ + private final int shortestUncompressedSuffix; + + /** + * The format's default extension. + */ + private final String defaultExtension; + + /** + * sets up the utility with a map of known compressed to + * uncompressed suffix mappings and the default extension of the + * format. + * + * @param uncompressSuffix Map from common filename suffixes of + * compressed files to the corresponding suffixes of uncompressed + * files. For example: from ".tgz" to ".tar". This map also + * contains format-specific suffixes like ".gz" and "-z". These + * suffixes are mapped to the empty string, as they should simply + * be removed from the filename when the file is uncompressed. + * + * @param defaultExtension the format's default extension like ".gz" + */ + public FileNameUtil(final Map<String, String> uncompressSuffix, + final String defaultExtension) { + this.uncompressSuffix = Collections.unmodifiableMap(uncompressSuffix); + int lc = Integer.MIN_VALUE, sc = Integer.MAX_VALUE; + int lu = Integer.MIN_VALUE, su = Integer.MAX_VALUE; + for (final Map.Entry<String, String> ent : uncompressSuffix.entrySet()) { + final int cl = ent.getKey().length(); + if (cl > lc) { + lc = cl; + } + if (cl < sc) { + sc = cl; + } + + final String u = ent.getValue(); + final int ul = u.length(); + if (ul > 0) { + if (!compressSuffix.containsKey(u)) { + compressSuffix.put(u, ent.getKey()); + } + if (ul > lu) { + lu = ul; + } + if (ul < su) { + su = ul; + } + } + } + longestCompressedSuffix = lc; + longestUncompressedSuffix = lu; + shortestCompressedSuffix = sc; + shortestUncompressedSuffix = su; + this.defaultExtension = defaultExtension; + } + + /** + * Detects common format suffixes in the given filename. + * + * @param filename name of a file + * @return {@code true} if the filename has a common format suffix, + * {@code false} otherwise + */ + public boolean isCompressedFilename(final String filename) { + final String lower = filename.toLowerCase(Locale.ENGLISH); + final int n = lower.length(); + for (int i = shortestCompressedSuffix; + i <= longestCompressedSuffix && i < n; i++) { + if (uncompressSuffix.containsKey(lower.substring(n - i))) { + return true; + } + } + return false; + } + + /** + * Maps the given name of a compressed file to the name that the + * file should have after uncompression. Commonly used file type specific + * suffixes like ".tgz" or ".svgz" are automatically detected and + * correctly mapped. For example the name "package.tgz" is mapped to + * "package.tar". And any filenames with the generic ".gz" suffix + * (or any other generic gzip suffix) is mapped to a name without that + * suffix. If no format suffix is detected, then the filename is returned + * unmapped. + * + * @param filename name of a file + * @return name of the corresponding uncompressed file + */ + public String getUncompressedFilename(final String filename) { + final String lower = filename.toLowerCase(Locale.ENGLISH); + final int n = lower.length(); + for (int i = shortestCompressedSuffix; + i <= longestCompressedSuffix && i < n; i++) { + final String suffix = uncompressSuffix.get(lower.substring(n - i)); + if (suffix != null) { + return filename.substring(0, n - i) + suffix; + } + } + return filename; + } + + /** + * Maps the given filename to the name that the file should have after + * compression. Common file types with custom suffixes for + * compressed versions are automatically detected and correctly mapped. + * For example the name "package.tar" is mapped to "package.tgz". If no + * custom mapping is applicable, then the default ".gz" suffix is appended + * to the filename. + * + * @param filename name of a file + * @return name of the corresponding compressed file + */ + public String getCompressedFilename(final String filename) { + final String lower = filename.toLowerCase(Locale.ENGLISH); + final int n = lower.length(); + for (int i = shortestUncompressedSuffix; + i <= longestUncompressedSuffix && i < n; i++) { + final String suffix = compressSuffix.get(lower.substring(n - i)); + if (suffix != null) { + return filename.substring(0, n - i) + suffix; + } + } + // No custom suffix found, just append the default + return filename + defaultExtension; + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/brotli/BrotliCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/brotli/BrotliCompressorInputStream.java new file mode 100644 index 000000000..5674cb220 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/brotli/BrotliCompressorInputStream.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.brotli; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; +import org.brotli.dec.BrotliInputStream; + +/** + * {@link CompressorInputStream} implementation to decode Brotli encoded stream. + * Library relies on <a href="https://github.com/google/brotli">Google brotli</a> + * + * @since 1.14 + */ +public class BrotliCompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + private final CountingInputStream countingStream; + private final BrotliInputStream decIS; + + public BrotliCompressorInputStream(final InputStream in) throws IOException { + decIS = new BrotliInputStream(countingStream = new CountingInputStream(in)); + } + + @Override + public int available() throws IOException { + return decIS.available(); + } + + @Override + public void close() throws IOException { + decIS.close(); + } + + @Override + public int read(final byte[] b) throws IOException { + return decIS.read(b); + } + + @Override + public long skip(final long n) throws IOException { + return IOUtils.skip(decIS, n); + } + + @Override + public void mark(final int readlimit) { + decIS.mark(readlimit); + } + + @Override + public boolean markSupported() { + return decIS.markSupported(); + } + + @Override + public int read() throws IOException { + final int ret = decIS.read(); + count(ret == -1 ? 0 : 1); + return ret; + } + + @Override + public int read(final byte[] buf, final int off, final int len) throws IOException { + final int ret = decIS.read(buf, off, len); + count(ret); + return ret; + } + + @Override + public String toString() { + return decIS.toString(); + } + + @Override + public void reset() throws IOException { + decIS.reset(); + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return countingStream.getBytesRead(); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/brotli/BrotliUtils.java b/src/main/java/org/apache/commons/compress/compressors/brotli/BrotliUtils.java new file mode 100644 index 000000000..15a21399d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/brotli/BrotliUtils.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.brotli; + +/** + * Utility code for the Brotli compression format. + * @ThreadSafe + * @since 1.14 + */ +public class BrotliUtils { + + enum CachedAvailability { + DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE + } + + private static volatile CachedAvailability cachedBrotliAvailability; + + static { + cachedBrotliAvailability = CachedAvailability.DONT_CACHE; + try { + Class.forName("org.osgi.framework.BundleEvent"); + } catch (final Exception ex) { // NOSONAR + setCacheBrotliAvailablity(true); + } + } + + /** Private constructor to prevent instantiation of this utility class. */ + private BrotliUtils() { + } + + + /** + * Are the classes required to support Brotli compression available? + * @return true if the classes required to support Brotli compression are available + */ + public static boolean isBrotliCompressionAvailable() { + final CachedAvailability cachedResult = cachedBrotliAvailability; + if (cachedResult != CachedAvailability.DONT_CACHE) { + return cachedResult == CachedAvailability.CACHED_AVAILABLE; + } + return internalIsBrotliCompressionAvailable(); + } + + private static boolean internalIsBrotliCompressionAvailable() { + try { + Class.forName("org.brotli.dec.BrotliInputStream"); + return true; + } catch (NoClassDefFoundError | Exception error) { // NOSONAR + return false; + } + } + + /** + * Whether to cache the result of the Brotli for Java check. + * + * <p>This defaults to {@code false} in an OSGi environment and {@code true} otherwise.</p> + * @param doCache whether to cache the result + */ + public static void setCacheBrotliAvailablity(final boolean doCache) { + if (!doCache) { + cachedBrotliAvailability = CachedAvailability.DONT_CACHE; + } else if (cachedBrotliAvailability == CachedAvailability.DONT_CACHE) { + final boolean hasBrotli = internalIsBrotliCompressionAvailable(); + cachedBrotliAvailability = hasBrotli ? CachedAvailability.CACHED_AVAILABLE + : CachedAvailability.CACHED_UNAVAILABLE; + } + } + + // only exists to support unit tests + static CachedAvailability getCachedBrotliAvailability() { + return cachedBrotliAvailability; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/brotli/package.html b/src/main/java/org/apache/commons/compress/compressors/brotli/package.html new file mode 100644 index 000000000..7654cf673 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/brotli/package.html @@ -0,0 +1,26 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream class for decompressing streams using the + Brotli algorithm based + on <a href="https://github.com/google/brotli">Google's Brotli + decoder</a>.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java new file mode 100644 index 000000000..677bbbd1e --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java @@ -0,0 +1,970 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * <keiron@aftexsw.com> to whom the Ant project is very grateful for his + * great code. + */ +package org.apache.commons.compress.compressors.bzip2; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; +import java.util.Arrays; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.BitInputStream; +import org.apache.commons.compress.utils.CloseShieldFilterInputStream; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * An input stream that decompresses from the BZip2 format to be read as any other stream. + * + * @NotThreadSafe + */ +public class BZip2CompressorInputStream extends CompressorInputStream + implements BZip2Constants, InputStreamStatistics { + + /** + * Index of the last char in the block, so the block size == last + 1. + */ + private int last; + + /** + * Index in zptr[] of original string after sorting. + */ + private int origPtr; + + /** + * always: in the range 0 .. 9. The current block size is 100000 * this + * number. + */ + private int blockSize100k; + + private boolean blockRandomised; + + private final CRC crc = new CRC(); + + private int nInUse; + + private BitInputStream bin; + private final boolean decompressConcatenated; + + private static final int EOF = 0; + private static final int START_BLOCK_STATE = 1; + private static final int RAND_PART_A_STATE = 2; + private static final int RAND_PART_B_STATE = 3; + private static final int RAND_PART_C_STATE = 4; + private static final int NO_RAND_PART_A_STATE = 5; + private static final int NO_RAND_PART_B_STATE = 6; + private static final int NO_RAND_PART_C_STATE = 7; + + private int currentState = START_BLOCK_STATE; + + private int storedBlockCRC, storedCombinedCRC; + private int computedBlockCRC, computedCombinedCRC; + + // Variables used by setup* methods exclusively + + private int su_count; + private int su_ch2; + private int su_chPrev; + private int su_i2; + private int su_j2; + private int su_rNToGo; + private int su_rTPos; + private int su_tPos; + private char su_z; + + /** + * All memory intensive stuff. This field is initialized by initBlock(). + */ + private BZip2CompressorInputStream.Data data; + + /** + * Constructs a new BZip2CompressorInputStream which decompresses bytes + * read from the specified stream. This doesn't suppprt decompressing + * concatenated .bz2 files. + * + * @param in the InputStream from which this object should be created + * @throws IOException + * if the stream content is malformed or an I/O error occurs. + * @throws NullPointerException + * if {@code in == null} + */ + public BZip2CompressorInputStream(final InputStream in) throws IOException { + this(in, false); + } + + /** + * Constructs a new BZip2CompressorInputStream which decompresses bytes + * read from the specified stream. + * + * @param in the InputStream from which this object should be created + * @param decompressConcatenated + * if true, decompress until the end of the input; + * if false, stop after the first .bz2 stream and + * leave the input position to point to the next + * byte after the .bz2 stream + * + * @throws IOException + * if {@code in == null}, the stream content is malformed, or an I/O error occurs. + */ + public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { + this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in, + ByteOrder.BIG_ENDIAN); + this.decompressConcatenated = decompressConcatenated; + + init(true); + initBlock(); + } + + @Override + public int read() throws IOException { + if (this.bin != null) { + final int r = read0(); + count(r < 0 ? -1 : 1); + return r; + } + throw new IOException("stream closed"); + } + + /* + * (non-Javadoc) + * + * @see java.io.InputStream#read(byte[], int, int) + */ + @Override + public int read(final byte[] dest, final int offs, final int len) + throws IOException { + if (offs < 0) { + throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); + } + if (len < 0) { + throw new IndexOutOfBoundsException("len(" + len + ") < 0."); + } + if (offs + len > dest.length) { + throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + + len + ") > dest.length(" + dest.length + ")."); + } + if (this.bin == null) { + throw new IOException("stream closed"); + } + if (len == 0) { + return 0; + } + + final int hi = offs + len; + int destOffs = offs; + int b; + while (destOffs < hi && ((b = read0()) >= 0)) { + dest[destOffs++] = (byte) b; + count(1); + } + + return (destOffs == offs) ? -1 : (destOffs - offs); + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return bin.getBytesRead(); + } + + private void makeMaps() { + final boolean[] inUse = this.data.inUse; + final byte[] seqToUnseq = this.data.seqToUnseq; + + int nInUseShadow = 0; + + for (int i = 0; i < 256; i++) { + if (inUse[i]) { + seqToUnseq[nInUseShadow++] = (byte) i; + } + } + + this.nInUse = nInUseShadow; + } + + private int read0() throws IOException { + switch (currentState) { + case EOF: + return -1; + + case START_BLOCK_STATE: + return setupBlock(); + + case RAND_PART_A_STATE: + throw new IllegalStateException(); + + case RAND_PART_B_STATE: + return setupRandPartB(); + + case RAND_PART_C_STATE: + return setupRandPartC(); + + case NO_RAND_PART_A_STATE: + throw new IllegalStateException(); + + case NO_RAND_PART_B_STATE: + return setupNoRandPartB(); + + case NO_RAND_PART_C_STATE: + return setupNoRandPartC(); + + default: + throw new IllegalStateException(); + } + } + + private int readNextByte(BitInputStream in) throws IOException { + long b = in.readBits(8); + return (int) b; + } + + private boolean init(final boolean isFirstStream) throws IOException { + if (null == bin) { + throw new IOException("No InputStream"); + } + + if (!isFirstStream) { + bin.clearBitCache(); + } + + final int magic0 = readNextByte(this.bin); + if (magic0 == -1 && !isFirstStream) { + return false; + } + final int magic1 = readNextByte(this.bin); + final int magic2 = readNextByte(this.bin); + + if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') { + throw new IOException(isFirstStream + ? "Stream is not in the BZip2 format" + : "Garbage after a valid BZip2 stream"); + } + + final int blockSize = readNextByte(this.bin); + if ((blockSize < '1') || (blockSize > '9')) { + throw new IOException("BZip2 block size is invalid"); + } + + this.blockSize100k = blockSize - '0'; + + this.computedCombinedCRC = 0; + + return true; + } + + private void initBlock() throws IOException { + BitInputStream bin = this.bin; + char magic0; + char magic1; + char magic2; + char magic3; + char magic4; + char magic5; + + while (true) { + // Get the block magic bytes. + magic0 = bsGetUByte(bin); + magic1 = bsGetUByte(bin); + magic2 = bsGetUByte(bin); + magic3 = bsGetUByte(bin); + magic4 = bsGetUByte(bin); + magic5 = bsGetUByte(bin); + + // If isn't end of stream magic, break out of the loop. + if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 + || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) { + break; + } + + // End of stream was reached. Check the combined CRC and + // advance to the next .bz2 stream if decoding concatenated + // streams. + if (complete()) { + return; + } + } + + if (magic0 != 0x31 || // '1' + magic1 != 0x41 || // ')' + magic2 != 0x59 || // 'Y' + magic3 != 0x26 || // '&' + magic4 != 0x53 || // 'S' + magic5 != 0x59 // 'Y' + ) { + this.currentState = EOF; + throw new IOException("bad block header"); + } + this.storedBlockCRC = bsGetInt(bin); + this.blockRandomised = bsR(bin, 1) == 1; + + /** + * Allocate data here instead in constructor, so we do not allocate + * it if the input file is empty. + */ + if (this.data == null) { + this.data = new Data(this.blockSize100k); + } + + // currBlockNo++; + getAndMoveToFrontDecode(); + + this.crc.initialiseCRC(); + this.currentState = START_BLOCK_STATE; + } + + private void endBlock() throws IOException { + this.computedBlockCRC = this.crc.getFinalCRC(); + + // A bad CRC is considered a fatal error. + if (this.storedBlockCRC != this.computedBlockCRC) { + // make next blocks readable without error + // (repair feature, not yet documented, not tested) + this.computedCombinedCRC = (this.storedCombinedCRC << 1) + | (this.storedCombinedCRC >>> 31); + this.computedCombinedCRC ^= this.storedBlockCRC; + + throw new IOException("BZip2 CRC error"); + } + + this.computedCombinedCRC = (this.computedCombinedCRC << 1) + | (this.computedCombinedCRC >>> 31); + this.computedCombinedCRC ^= this.computedBlockCRC; + } + + private boolean complete() throws IOException { + this.storedCombinedCRC = bsGetInt(bin); + this.currentState = EOF; + this.data = null; + + if (this.storedCombinedCRC != this.computedCombinedCRC) { + throw new IOException("BZip2 CRC error"); + } + + // Look for the next .bz2 stream if decompressing + // concatenated files. + return !decompressConcatenated || !init(false); + } + + @Override + public void close() throws IOException { + final BitInputStream inShadow = this.bin; + if (inShadow != null) { + try { + inShadow.close(); + } finally { + this.data = null; + this.bin = null; + } + } + } + + /** + * read bits from the input stream + * @param n the number of bits to read, must not exceed 32? + * @return the requested bits combined into an int + * @throws IOException + */ + private static int bsR(BitInputStream bin, final int n) throws IOException { + long thech = bin.readBits(n); + if (thech < 0) { + throw new IOException("unexpected end of stream"); + } + return (int) thech; + } + + private static boolean bsGetBit(BitInputStream bin) throws IOException { + return bsR(bin, 1) != 0; + } + + private static char bsGetUByte(BitInputStream bin) throws IOException { + return (char) bsR(bin, 8); + } + + private static int bsGetInt(BitInputStream bin) throws IOException { + return bsR(bin, 32); + } + + private static void checkBounds(final int checkVal, final int limitExclusive, String name) + throws IOException { + if (checkVal < 0) { + throw new IOException("Corrupted input, " + name + " value negative"); + } + if (checkVal >= limitExclusive) { + throw new IOException("Corrupted input, " + name + " value too big"); + } + } + + /** + * Called by createHuffmanDecodingTables() exclusively. + */ + private static void hbCreateDecodeTables(final int[] limit, + final int[] base, final int[] perm, final char[] length, + final int minLen, final int maxLen, final int alphaSize) + throws IOException { + for (int i = minLen, pp = 0; i <= maxLen; i++) { + for (int j = 0; j < alphaSize; j++) { + if (length[j] == i) { + perm[pp++] = j; + } + } + } + + for (int i = MAX_CODE_LEN; --i > 0;) { + base[i] = 0; + limit[i] = 0; + } + + for (int i = 0; i < alphaSize; i++) { + final int l = length[i]; + checkBounds(l, MAX_ALPHA_SIZE, "length"); + base[l + 1]++; + } + + for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { + b += base[i]; + base[i] = b; + } + + for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { + final int nb = base[i + 1]; + vec += nb - b; + b = nb; + limit[i] = vec - 1; + vec <<= 1; + } + + for (int i = minLen + 1; i <= maxLen; i++) { + base[i] = ((limit[i - 1] + 1) << 1) - base[i]; + } + } + + private void recvDecodingTables() throws IOException { + final BitInputStream bin = this.bin; + final Data dataShadow = this.data; + final boolean[] inUse = dataShadow.inUse; + final byte[] pos = dataShadow.recvDecodingTables_pos; + final byte[] selector = dataShadow.selector; + final byte[] selectorMtf = dataShadow.selectorMtf; + + int inUse16 = 0; + + /* Receive the mapping table */ + for (int i = 0; i < 16; i++) { + if (bsGetBit(bin)) { + inUse16 |= 1 << i; + } + } + + Arrays.fill(inUse, false); + for (int i = 0; i < 16; i++) { + if ((inUse16 & (1 << i)) != 0) { + final int i16 = i << 4; + for (int j = 0; j < 16; j++) { + if (bsGetBit(bin)) { + inUse[i16 + j] = true; + } + } + } + } + + makeMaps(); + final int alphaSize = this.nInUse + 2; + /* Now the selectors */ + final int nGroups = bsR(bin, 3); + final int nSelectors = bsR(bin, 15); + checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize"); + checkBounds(nGroups, N_GROUPS + 1, "nGroups"); + checkBounds(nSelectors, MAX_SELECTORS + 1, "nSelectors"); + + for (int i = 0; i < nSelectors; i++) { + int j = 0; + while (bsGetBit(bin)) { + j++; + } + selectorMtf[i] = (byte) j; + } + + /* Undo the MTF values for the selectors. */ + for (int v = nGroups; --v >= 0;) { + pos[v] = (byte) v; + } + + for (int i = 0; i < nSelectors; i++) { + int v = selectorMtf[i] & 0xff; + checkBounds(v, N_GROUPS, "selectorMtf"); + final byte tmp = pos[v]; + while (v > 0) { + // nearly all times v is zero, 4 in most other cases + pos[v] = pos[v - 1]; + v--; + } + pos[0] = tmp; + selector[i] = tmp; + } + + final char[][] len = dataShadow.temp_charArray2d; + + /* Now the coding tables */ + for (int t = 0; t < nGroups; t++) { + int curr = bsR(bin, 5); + final char[] len_t = len[t]; + for (int i = 0; i < alphaSize; i++) { + while (bsGetBit(bin)) { + curr += bsGetBit(bin) ? -1 : 1; + } + len_t[i] = (char) curr; + } + } + + // finally create the Huffman tables + createHuffmanDecodingTables(alphaSize, nGroups); + } + + /** + * Called by recvDecodingTables() exclusively. + */ + private void createHuffmanDecodingTables(final int alphaSize, + final int nGroups) throws IOException { + final Data dataShadow = this.data; + final char[][] len = dataShadow.temp_charArray2d; + final int[] minLens = dataShadow.minLens; + final int[][] limit = dataShadow.limit; + final int[][] base = dataShadow.base; + final int[][] perm = dataShadow.perm; + + for (int t = 0; t < nGroups; t++) { + int minLen = 32; + int maxLen = 0; + final char[] len_t = len[t]; + for (int i = alphaSize; --i >= 0;) { + final char lent = len_t[i]; + if (lent > maxLen) { + maxLen = lent; + } + if (lent < minLen) { + minLen = lent; + } + } + hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, + maxLen, alphaSize); + minLens[t] = minLen; + } + } + + private void getAndMoveToFrontDecode() throws IOException { + final BitInputStream bin = this.bin; + this.origPtr = bsR(bin, 24); + recvDecodingTables(); + + final Data dataShadow = this.data; + final byte[] ll8 = dataShadow.ll8; + final int[] unzftab = dataShadow.unzftab; + final byte[] selector = dataShadow.selector; + final byte[] seqToUnseq = dataShadow.seqToUnseq; + final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; + final int[] minLens = dataShadow.minLens; + final int[][] limit = dataShadow.limit; + final int[][] base = dataShadow.base; + final int[][] perm = dataShadow.perm; + final int limitLast = this.blockSize100k * 100000; + + /* + * Setting up the unzftab entries here is not strictly necessary, but it + * does save having to do it later in a separate pass, and so saves a + * block's worth of cache misses. + */ + for (int i = 256; --i >= 0;) { + yy[i] = (char) i; + unzftab[i] = 0; + } + + int groupNo = 0; + int groupPos = G_SIZE - 1; + final int eob = this.nInUse + 1; + int nextSym = getAndMoveToFrontDecode0(); + int lastShadow = -1; + int zt = selector[groupNo] & 0xff; + checkBounds(zt, N_GROUPS, "zt"); + int[] base_zt = base[zt]; + int[] limit_zt = limit[zt]; + int[] perm_zt = perm[zt]; + int minLens_zt = minLens[zt]; + + while (nextSym != eob) { + if ((nextSym == RUNA) || (nextSym == RUNB)) { + int s = -1; + + for (int n = 1; true; n <<= 1) { + if (nextSym == RUNA) { + s += n; + } else if (nextSym == RUNB) { + s += n << 1; + } else { + break; + } + + if (groupPos == 0) { + groupPos = G_SIZE - 1; + checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); + zt = selector[groupNo] & 0xff; + checkBounds(zt, N_GROUPS, "zt"); + base_zt = base[zt]; + limit_zt = limit[zt]; + perm_zt = perm[zt]; + minLens_zt = minLens[zt]; + } else { + groupPos--; + } + + int zn = minLens_zt; + checkBounds(zn, MAX_ALPHA_SIZE, "zn"); + int zvec = bsR(bin, zn); + while(zvec > limit_zt[zn]) { + checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); + zvec = (zvec << 1) | bsR(bin, 1); + } + final int tmp = zvec - base_zt[zn]; + checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); + nextSym = perm_zt[tmp]; + } + + final int yy0 = yy[0]; + checkBounds(yy0, 256, "yy"); + final byte ch = seqToUnseq[yy0]; + unzftab[ch & 0xff] += s + 1; + + final int from = ++lastShadow; + lastShadow += s; + Arrays.fill(ll8, from, lastShadow + 1, ch); + + if (lastShadow >= limitLast) { + throw new IOException("block overrun while expanding RLE in MTF, " + + lastShadow + " exceeds " + limitLast); + } + } else { + if (++lastShadow >= limitLast) { + throw new IOException("block overrun in MTF, " + + lastShadow + " exceeds " + limitLast); + } + checkBounds(nextSym, 256 + 1, "nextSym"); + + final char tmp = yy[nextSym - 1]; + checkBounds(tmp, 256, "yy"); + unzftab[seqToUnseq[tmp] & 0xff]++; + ll8[lastShadow] = seqToUnseq[tmp]; + + /* + * This loop is hammered during decompression, hence avoid + * native method call overhead of System.arraycopy for very + * small ranges to copy. + */ + if (nextSym <= 16) { + for (int j = nextSym - 1; j > 0;) { + yy[j] = yy[--j]; + } + } else { + System.arraycopy(yy, 0, yy, 1, nextSym - 1); + } + + yy[0] = tmp; + + if (groupPos == 0) { + groupPos = G_SIZE - 1; + checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); + zt = selector[groupNo] & 0xff; + checkBounds(zt, N_GROUPS, "zt"); + base_zt = base[zt]; + limit_zt = limit[zt]; + perm_zt = perm[zt]; + minLens_zt = minLens[zt]; + } else { + groupPos--; + } + + int zn = minLens_zt; + checkBounds(zn, MAX_ALPHA_SIZE, "zn"); + int zvec = bsR(bin, zn); + while(zvec > limit_zt[zn]) { + checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); + zvec = (zvec << 1) | bsR(bin, 1); + } + final int idx = zvec - base_zt[zn]; + checkBounds(idx, MAX_ALPHA_SIZE, "zvec"); + nextSym = perm_zt[idx]; + } + } + + this.last = lastShadow; + } + + private int getAndMoveToFrontDecode0() throws IOException { + final Data dataShadow = this.data; + final int zt = dataShadow.selector[0] & 0xff; + checkBounds(zt, N_GROUPS, "zt"); + final int[] limit_zt = dataShadow.limit[zt]; + int zn = dataShadow.minLens[zt]; + checkBounds(zn, MAX_ALPHA_SIZE, "zn"); + int zvec = bsR(bin, zn); + while (zvec > limit_zt[zn]) { + checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); + zvec = (zvec << 1) | bsR(bin, 1); + } + final int tmp = zvec - dataShadow.base[zt][zn]; + checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); + + return dataShadow.perm[zt][tmp]; + } + + private int setupBlock() throws IOException { + if (currentState == EOF || this.data == null) { + return -1; + } + + final int[] cftab = this.data.cftab; + final int ttLen = this.last + 1; + final int[] tt = this.data.initTT(ttLen); + final byte[] ll8 = this.data.ll8; + cftab[0] = 0; + System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); + + for (int i = 1, c = cftab[0]; i <= 256; i++) { + c += cftab[i]; + cftab[i] = c; + } + + for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { + final int tmp = cftab[ll8[i] & 0xff]++; + checkBounds(tmp, ttLen, "tt index"); + tt[tmp] = i; + } + + if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { + throw new IOException("stream corrupted"); + } + + this.su_tPos = tt[this.origPtr]; + this.su_count = 0; + this.su_i2 = 0; + this.su_ch2 = 256; /* not a char and not EOF */ + + if (this.blockRandomised) { + this.su_rNToGo = 0; + this.su_rTPos = 0; + return setupRandPartA(); + } + return setupNoRandPartA(); + } + + private int setupRandPartA() throws IOException { + if (this.su_i2 <= this.last) { + this.su_chPrev = this.su_ch2; + int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; + checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); + this.su_tPos = this.data.tt[this.su_tPos]; + if (this.su_rNToGo == 0) { + this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; + if (++this.su_rTPos == 512) { + this.su_rTPos = 0; + } + } else { + this.su_rNToGo--; + } + this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0; + this.su_i2++; + this.currentState = RAND_PART_B_STATE; + this.crc.updateCRC(su_ch2Shadow); + return su_ch2Shadow; + } + endBlock(); + initBlock(); + return setupBlock(); + } + + private int setupNoRandPartA() throws IOException { + if (this.su_i2 <= this.last) { + this.su_chPrev = this.su_ch2; + final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; + this.su_ch2 = su_ch2Shadow; + checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); + this.su_tPos = this.data.tt[this.su_tPos]; + this.su_i2++; + this.currentState = NO_RAND_PART_B_STATE; + this.crc.updateCRC(su_ch2Shadow); + return su_ch2Shadow; + } + this.currentState = NO_RAND_PART_A_STATE; + endBlock(); + initBlock(); + return setupBlock(); + } + + private int setupRandPartB() throws IOException { + if (this.su_ch2 != this.su_chPrev) { + this.currentState = RAND_PART_A_STATE; + this.su_count = 1; + return setupRandPartA(); + } else if (++this.su_count >= 4) { + this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); + checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); + this.su_tPos = this.data.tt[this.su_tPos]; + if (this.su_rNToGo == 0) { + this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; + if (++this.su_rTPos == 512) { + this.su_rTPos = 0; + } + } else { + this.su_rNToGo--; + } + this.su_j2 = 0; + this.currentState = RAND_PART_C_STATE; + if (this.su_rNToGo == 1) { + this.su_z ^= 1; + } + return setupRandPartC(); + } else { + this.currentState = RAND_PART_A_STATE; + return setupRandPartA(); + } + } + + private int setupRandPartC() throws IOException { + if (this.su_j2 < this.su_z) { + this.crc.updateCRC(this.su_ch2); + this.su_j2++; + return this.su_ch2; + } + this.currentState = RAND_PART_A_STATE; + this.su_i2++; + this.su_count = 0; + return setupRandPartA(); + } + + private int setupNoRandPartB() throws IOException { + if (this.su_ch2 != this.su_chPrev) { + this.su_count = 1; + return setupNoRandPartA(); + } else if (++this.su_count >= 4) { + checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos"); + this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); + this.su_tPos = this.data.tt[this.su_tPos]; + this.su_j2 = 0; + return setupNoRandPartC(); + } else { + return setupNoRandPartA(); + } + } + + private int setupNoRandPartC() throws IOException { + if (this.su_j2 < this.su_z) { + final int su_ch2Shadow = this.su_ch2; + this.crc.updateCRC(su_ch2Shadow); + this.su_j2++; + this.currentState = NO_RAND_PART_C_STATE; + return su_ch2Shadow; + } + this.su_i2++; + this.su_count = 0; + return setupNoRandPartA(); + } + + private static final class Data { + + // (with blockSize 900k) + final boolean[] inUse = new boolean[256]; // 256 byte + + final byte[] seqToUnseq = new byte[256]; // 256 byte + final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + + /** + * Freq table collected to save a pass over the data during + * decompression. + */ + final int[] unzftab = new int[256]; // 1024 byte + + final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[] minLens = new int[N_GROUPS]; // 24 byte + + final int[] cftab = new int[257]; // 1028 byte + final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte + final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 + // byte + final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte + // --------------- + // 60798 byte + + int[] tt; // 3600000 byte + byte[] ll8; // 900000 byte + + // --------------- + // 4560782 byte + // =============== + + Data(final int blockSize100k) { + this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE]; + } + + /** + * Initializes the {@link #tt} array. + * + * This method is called when the required length of the array is known. + * I don't initialize it at construction time to avoid unneccessary + * memory allocation when compressing small files. + */ + int[] initTT(final int length) { + int[] ttShadow = this.tt; + + // tt.length should always be >= length, but theoretically + // it can happen, if the compressor mixed small and large + // blocks. Normally only the last block will be smaller + // than others. + if ((ttShadow == null) || (ttShadow.length < length)) { + this.tt = ttShadow = new int[length]; + } + + return ttShadow; + } + + } + + /** + * Checks if the signature matches what is expected for a bzip2 file. + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is a bzip2 compressed stream, false otherwise + * + * @since 1.1 + */ + public static boolean matches(final byte[] signature, final int length) { + return length >= 3 && signature[0] == 'B' && + signature[1] == 'Z' && signature[2] == 'h'; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java new file mode 100644 index 000000000..cccf38d2a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java @@ -0,0 +1,1337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.commons.compress.compressors.CompressorOutputStream; + +/** + * An output stream that compresses into the BZip2 format into another stream. + * + * <p> + * The compression requires large amounts of memory. Thus you should call the + * {@link #close() close()} method as soon as possible, to force + * {@code BZip2CompressorOutputStream} to release the allocated memory. + * </p> + * + * <p> You can shrink the amount of allocated memory and maybe raise + * the compression speed by choosing a lower blocksize, which in turn + * may cause a lower compression ratio. You can avoid unnecessary + * memory allocation by avoiding using a blocksize which is bigger + * than the size of the input. </p> + * + * <p> You can compute the memory usage for compressing by the + * following formula: </p> + * + * <pre> + * <code>400k + (9 * blocksize)</code>. + * </pre> + * + * <p> To get the memory required for decompression by {@link + * BZip2CompressorInputStream} use </p> + * + * <pre> + * <code>65k + (5 * blocksize)</code>. + * </pre> + * + * <table width="100%" border="1" summary="Memory usage by blocksize"> + * <tr> + * <th colspan="3">Memory usage by blocksize</th> + * </tr> + * <tr> + * <th align="right">Blocksize</th> <th align="right">Compression<br> + * memory usage</th> <th align="right">Decompression<br> + * memory usage</th> + * </tr> + * <tr> + * <td align="right">100k</td> + * <td align="right">1300k</td> + * <td align="right">565k</td> + * </tr> + * <tr> + * <td align="right">200k</td> + * <td align="right">2200k</td> + * <td align="right">1065k</td> + * </tr> + * <tr> + * <td align="right">300k</td> + * <td align="right">3100k</td> + * <td align="right">1565k</td> + * </tr> + * <tr> + * <td align="right">400k</td> + * <td align="right">4000k</td> + * <td align="right">2065k</td> + * </tr> + * <tr> + * <td align="right">500k</td> + * <td align="right">4900k</td> + * <td align="right">2565k</td> + * </tr> + * <tr> + * <td align="right">600k</td> + * <td align="right">5800k</td> + * <td align="right">3065k</td> + * </tr> + * <tr> + * <td align="right">700k</td> + * <td align="right">6700k</td> + * <td align="right">3565k</td> + * </tr> + * <tr> + * <td align="right">800k</td> + * <td align="right">7600k</td> + * <td align="right">4065k</td> + * </tr> + * <tr> + * <td align="right">900k</td> + * <td align="right">8500k</td> + * <td align="right">4565k</td> + * </tr> + * </table> + * + * <p> + * For decompression {@code BZip2CompressorInputStream} allocates less memory if the + * bzipped input is smaller than one block. + * </p> + * + * <p> + * Instances of this class are not threadsafe. + * </p> + * + * <p> + * TODO: Update to BZip2 1.0.1 + * </p> + * @NotThreadSafe + */ +public class BZip2CompressorOutputStream extends CompressorOutputStream + implements BZip2Constants { + + /** + * The minimum supported blocksize {@code == 1}. + */ + public static final int MIN_BLOCKSIZE = 1; + + /** + * The maximum supported blocksize {@code == 9}. + */ + public static final int MAX_BLOCKSIZE = 9; + + private static final int GREATER_ICOST = 15; + private static final int LESSER_ICOST = 0; + + private static void hbMakeCodeLengths(final byte[] len, final int[] freq, + final Data dat, final int alphaSize, + final int maxLen) { + /* + * Nodes and heap entries run from 1. Entry 0 for both the heap and + * nodes is a sentinel. + */ + final int[] heap = dat.heap; + final int[] weight = dat.weight; + final int[] parent = dat.parent; + + for (int i = alphaSize; --i >= 0;) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } + + for (boolean tooLong = true; tooLong;) { + tooLong = false; + + int nNodes = alphaSize; + int nHeap = 0; + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (int i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + + int zz = nHeap; + final int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + + while (nHeap > 1) { + final int n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + int yy = 0; + int zz = 1; + int tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + + final int n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + nNodes++; + parent[n1] = parent[n2] = nNodes; + + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = ((weight_n1 & 0xffffff00) + + (weight_n2 & 0xffffff00)) + | (1 + (((weight_n1 & 0x000000ff) + > (weight_n2 & 0x000000ff)) + ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff))); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + + } + + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; + j++; + } + + len[i - 1] = (byte) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } + } + } + } + + /** + * Index of the last char in the block, so the block size == last + 1. + */ + private int last; + + /** + * Always: in the range 0 .. 9. The current block size is 100000 * this + * number. + */ + private final int blockSize100k; + + private int bsBuff; + private int bsLive; + private final CRC crc = new CRC(); + + private int nInUse; + + private int nMTF; + + private int currentChar = -1; + private int runLength = 0; + + private int blockCRC; + private int combinedCRC; + private final int allowableBlockSize; + + /** + * All memory intensive stuff. + */ + private Data data; + private BlockSort blockSorter; + + private OutputStream out; + private volatile boolean closed; + + /** + * Chooses a blocksize based on the given length of the data to compress. + * + * @return The blocksize, between {@link #MIN_BLOCKSIZE} and + * {@link #MAX_BLOCKSIZE} both inclusive. For a negative + * {@code inputLength} this method returns {@code MAX_BLOCKSIZE} + * always. + * + * @param inputLength + * The length of the data which will be compressed by + * {@code BZip2CompressorOutputStream}. + */ + public static int chooseBlockSize(final long inputLength) { + return (inputLength > 0) ? (int) Math + .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; + } + + /** + * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. + * + * @param out + * the destination stream. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws NullPointerException + * if <code>out == null</code>. + */ + public BZip2CompressorOutputStream(final OutputStream out) + throws IOException { + this(out, MAX_BLOCKSIZE); + } + + /** + * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. + * + * @param out + * the destination stream. + * @param blockSize + * the blockSize as 100k units. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws IllegalArgumentException + * if <code>(blockSize < 1) || (blockSize > 9)</code>. + * @throws NullPointerException + * if <code>out == null</code>. + * + * @see #MIN_BLOCKSIZE + * @see #MAX_BLOCKSIZE + */ + public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { + if (blockSize < 1) { + throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); + } + if (blockSize > 9) { + throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); + } + + this.blockSize100k = blockSize; + this.out = out; + + /* 20 is just a paranoia constant */ + this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; + init(); + } + + @Override + public void write(final int b) throws IOException { + if (!closed) { + write0(b); + } else { + throw new IOException("closed"); + } + } + + /** + * Writes the current byte to the buffer, run-length encoding it + * if it has been repeated at least four times (the first step + * RLEs sequences of four identical bytes). + * + * <p>Flushes the current block before writing data if it is + * full.</p> + * + * <p>"write to the buffer" means adding to data.buffer starting + * two steps "after" this.last - initially starting at index 1 + * (not 0) - and updating this.last to point to the last index + * written minus 1.</p> + */ + private void writeRun() throws IOException { + final int lastShadow = this.last; + + if (lastShadow < this.allowableBlockSize) { + final int currentCharShadow = this.currentChar; + final Data dataShadow = this.data; + dataShadow.inUse[currentCharShadow] = true; + final byte ch = (byte) currentCharShadow; + + int runLengthShadow = this.runLength; + this.crc.updateCRC(currentCharShadow, runLengthShadow); + + switch (runLengthShadow) { + case 1: + dataShadow.block[lastShadow + 2] = ch; + this.last = lastShadow + 1; + break; + + case 2: + dataShadow.block[lastShadow + 2] = ch; + dataShadow.block[lastShadow + 3] = ch; + this.last = lastShadow + 2; + break; + + case 3: { + final byte[] block = dataShadow.block; + block[lastShadow + 2] = ch; + block[lastShadow + 3] = ch; + block[lastShadow + 4] = ch; + this.last = lastShadow + 3; + } + break; + + default: { + runLengthShadow -= 4; + dataShadow.inUse[runLengthShadow] = true; + final byte[] block = dataShadow.block; + block[lastShadow + 2] = ch; + block[lastShadow + 3] = ch; + block[lastShadow + 4] = ch; + block[lastShadow + 5] = ch; + block[lastShadow + 6] = (byte) runLengthShadow; + this.last = lastShadow + 5; + } + break; + + } + } else { + endBlock(); + initBlock(); + writeRun(); + } + } + + /** + * Overriden to warn about an unclosed stream. + */ + @Override + protected void finalize() throws Throwable { + if (!closed) { + System.err.println("Unclosed BZip2CompressorOutputStream detected, will *not* close it"); + } + super.finalize(); + } + + + public void finish() throws IOException { + if (!closed) { + closed = true; + try { + if (this.runLength > 0) { + writeRun(); + } + this.currentChar = -1; + endBlock(); + endCompression(); + } finally { + this.out = null; + this.blockSorter = null; + this.data = null; + } + } + } + + @Override + public void close() throws IOException { + if (!closed) { + final OutputStream outShadow = this.out; + try { + finish(); + } finally { + outShadow.close(); + } + } + } + + @Override + public void flush() throws IOException { + final OutputStream outShadow = this.out; + if (outShadow != null) { + outShadow.flush(); + } + } + + /** + * Writes magic bytes like BZ on the first position of the stream + * and bytes indiciating the file-format, which is + * huffmanised, followed by a digit indicating blockSize100k. + * @throws IOException if the magic bytes could not been written + */ + private void init() throws IOException { + bsPutUByte('B'); + bsPutUByte('Z'); + + this.data = new Data(this.blockSize100k); + this.blockSorter = new BlockSort(this.data); + + // huffmanised magic bytes + bsPutUByte('h'); + bsPutUByte('0' + this.blockSize100k); + + this.combinedCRC = 0; + initBlock(); + } + + private void initBlock() { + // blockNo++; + this.crc.initialiseCRC(); + this.last = -1; + // ch = 0; + + final boolean[] inUse = this.data.inUse; + for (int i = 256; --i >= 0;) { + inUse[i] = false; + } + + } + + private void endBlock() throws IOException { + this.blockCRC = this.crc.getFinalCRC(); + this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); + this.combinedCRC ^= this.blockCRC; + + // empty block at end of file + if (this.last == -1) { + return; + } + + /* sort the block and establish posn of original string */ + blockSort(); + + /* + * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 + * :-). A 32 bit value does not really give a strong enough guarantee + * that the value will not appear by chance in the compressed + * datastream. Worst-case probability of this event, for a 900k block, + * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 + * bits. For a compressed file of size 100Gb -- about 100000 blocks -- + * only a 48-bit marker will do. NB: normal compression/ decompression + * donot rely on these statistical properties. They are only important + * when trying to recover blocks from damaged files. + */ + bsPutUByte(0x31); + bsPutUByte(0x41); + bsPutUByte(0x59); + bsPutUByte(0x26); + bsPutUByte(0x53); + bsPutUByte(0x59); + + /* Now the block's CRC, so it is in a known place. */ + bsPutInt(this.blockCRC); + + /* Now a single bit indicating no randomisation. */ + bsW(1, 0); + + /* Finally, block's contents proper. */ + moveToFrontCodeAndSend(); + } + + private void endCompression() throws IOException { + /* + * Now another magic 48-bit number, 0x177245385090, to indicate the end + * of the last block. (sqrt(pi), if you want to know. I did want to use + * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me + * to feel statistically comfortable. Call me paranoid.) + */ + bsPutUByte(0x17); + bsPutUByte(0x72); + bsPutUByte(0x45); + bsPutUByte(0x38); + bsPutUByte(0x50); + bsPutUByte(0x90); + + bsPutInt(this.combinedCRC); + bsFinishedWithStream(); + } + + /** + * Returns the blocksize parameter specified at construction time. + * @return the blocksize parameter specified at construction time + */ + public final int getBlockSize() { + return this.blockSize100k; + } + + @Override + public void write(final byte[] buf, int offs, final int len) + throws IOException { + if (offs < 0) { + throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); + } + if (len < 0) { + throw new IndexOutOfBoundsException("len(" + len + ") < 0."); + } + if (offs + len > buf.length) { + throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + + len + ") > buf.length(" + + buf.length + ")."); + } + if (closed) { + throw new IOException("stream closed"); + } + + for (final int hi = offs + len; offs < hi;) { + write0(buf[offs++]); + } + } + + /** + * Keeps track of the last bytes written and implicitly performs + * run-length encoding as the first step of the bzip2 algorithm. + */ + private void write0(int b) throws IOException { + if (this.currentChar != -1) { + b &= 0xff; + if (this.currentChar == b) { + if (++this.runLength > 254) { + writeRun(); + this.currentChar = -1; + this.runLength = 0; + } + // else nothing to do + } else { + writeRun(); + this.runLength = 1; + this.currentChar = b; + } + } else { + this.currentChar = b & 0xff; + this.runLength++; + } + } + + private static void hbAssignCodes(final int[] code, final byte[] length, + final int minLen, final int maxLen, + final int alphaSize) { + int vec = 0; + for (int n = minLen; n <= maxLen; n++) { + for (int i = 0; i < alphaSize; i++) { + if ((length[i] & 0xff) == n) { + code[i] = vec; + vec++; + } + } + vec <<= 1; + } + } + + private void bsFinishedWithStream() throws IOException { + while (this.bsLive > 0) { + final int ch = this.bsBuff >> 24; + this.out.write(ch); // write 8-bit + this.bsBuff <<= 8; + this.bsLive -= 8; + } + } + + private void bsW(final int n, final int v) throws IOException { + final OutputStream outShadow = this.out; + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + + this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); + this.bsLive = bsLiveShadow + n; + } + + private void bsPutUByte(final int c) throws IOException { + bsW(8, c); + } + + private void bsPutInt(final int u) throws IOException { + bsW(8, (u >> 24) & 0xff); + bsW(8, (u >> 16) & 0xff); + bsW(8, (u >> 8) & 0xff); + bsW(8, u & 0xff); + } + + private void sendMTFValues() throws IOException { + final byte[][] len = this.data.sendMTFValues_len; + final int alphaSize = this.nInUse + 2; + + for (int t = N_GROUPS; --t >= 0;) { + final byte[] len_t = len[t]; + for (int v = alphaSize; --v >= 0;) { + len_t[v] = GREATER_ICOST; + } + } + + /* Decide how many coding tables to use */ + // assert (this.nMTF > 0) : this.nMTF; + final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 + : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; + + /* Generate an initial set of coding tables */ + sendMTFValues0(nGroups, alphaSize); + + /* + * Iterate up to N_ITERS times to improve the tables. + */ + final int nSelectors = sendMTFValues1(nGroups, alphaSize); + + /* Compute MTF values for the selectors. */ + sendMTFValues2(nGroups, nSelectors); + + /* Assign actual codes for the tables. */ + sendMTFValues3(nGroups, alphaSize); + + /* Transmit the mapping table. */ + sendMTFValues4(); + + /* Now the selectors. */ + sendMTFValues5(nGroups, nSelectors); + + /* Now the coding tables. */ + sendMTFValues6(nGroups, alphaSize); + + /* And finally, the block data proper */ + sendMTFValues7(); + } + + private void sendMTFValues0(final int nGroups, final int alphaSize) { + final byte[][] len = this.data.sendMTFValues_len; + final int[] mtfFreq = this.data.mtfFreq; + + int remF = this.nMTF; + int gs = 0; + + for (int nPart = nGroups; nPart > 0; nPart--) { + final int tFreq = remF / nPart; + int ge = gs - 1; + int aFreq = 0; + + for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { + aFreq += mtfFreq[++ge]; + } + + if ((ge > gs) && (nPart != nGroups) && (nPart != 1) + && (((nGroups - nPart) & 1) != 0)) { + aFreq -= mtfFreq[ge--]; + } + + final byte[] len_np = len[nPart - 1]; + for (int v = alphaSize; --v >= 0;) { + if ((v >= gs) && (v <= ge)) { + len_np[v] = LESSER_ICOST; + } else { + len_np[v] = GREATER_ICOST; + } + } + + gs = ge + 1; + remF -= aFreq; + } + } + + private int sendMTFValues1(final int nGroups, final int alphaSize) { + final Data dataShadow = this.data; + final int[][] rfreq = dataShadow.sendMTFValues_rfreq; + final int[] fave = dataShadow.sendMTFValues_fave; + final short[] cost = dataShadow.sendMTFValues_cost; + final char[] sfmap = dataShadow.sfmap; + final byte[] selector = dataShadow.selector; + final byte[][] len = dataShadow.sendMTFValues_len; + final byte[] len_0 = len[0]; + final byte[] len_1 = len[1]; + final byte[] len_2 = len[2]; + final byte[] len_3 = len[3]; + final byte[] len_4 = len[4]; + final byte[] len_5 = len[5]; + final int nMTFShadow = this.nMTF; + + int nSelectors = 0; + + for (int iter = 0; iter < N_ITERS; iter++) { + for (int t = nGroups; --t >= 0;) { + fave[t] = 0; + final int[] rfreqt = rfreq[t]; + for (int i = alphaSize; --i >= 0;) { + rfreqt[i] = 0; + } + } + + nSelectors = 0; + + for (int gs = 0; gs < this.nMTF;) { + /* Set group start & end marks. */ + + /* + * Calculate the cost of this group as coded by each of the + * coding tables. + */ + + final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); + + if (nGroups == N_GROUPS) { + // unrolled version of the else-block + + short cost0 = 0; + short cost1 = 0; + short cost2 = 0; + short cost3 = 0; + short cost4 = 0; + short cost5 = 0; + + for (int i = gs; i <= ge; i++) { + final int icv = sfmap[i]; + cost0 += len_0[icv] & 0xff; + cost1 += len_1[icv] & 0xff; + cost2 += len_2[icv] & 0xff; + cost3 += len_3[icv] & 0xff; + cost4 += len_4[icv] & 0xff; + cost5 += len_5[icv] & 0xff; + } + + cost[0] = cost0; + cost[1] = cost1; + cost[2] = cost2; + cost[3] = cost3; + cost[4] = cost4; + cost[5] = cost5; + + } else { + for (int t = nGroups; --t >= 0;) { + cost[t] = 0; + } + + for (int i = gs; i <= ge; i++) { + final int icv = sfmap[i]; + for (int t = nGroups; --t >= 0;) { + cost[t] += len[t][icv] & 0xff; + } + } + } + + /* + * Find the coding table which is best for this group, and + * record its identity in the selector table. + */ + int bt = -1; + for (int t = nGroups, bc = 999999999; --t >= 0;) { + final int cost_t = cost[t]; + if (cost_t < bc) { + bc = cost_t; + bt = t; + } + } + + fave[bt]++; + selector[nSelectors] = (byte) bt; + nSelectors++; + + /* + * Increment the symbol frequencies for the selected table. + */ + final int[] rfreq_bt = rfreq[bt]; + for (int i = gs; i <= ge; i++) { + rfreq_bt[sfmap[i]]++; + } + + gs = ge + 1; + } + + /* + * Recompute the tables based on the accumulated frequencies. + */ + for (int t = 0; t < nGroups; t++) { + hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); + } + } + + return nSelectors; + } + + private void sendMTFValues2(final int nGroups, final int nSelectors) { + // assert (nGroups < 8) : nGroups; + + final Data dataShadow = this.data; + final byte[] pos = dataShadow.sendMTFValues2_pos; + + for (int i = nGroups; --i >= 0;) { + pos[i] = (byte) i; + } + + for (int i = 0; i < nSelectors; i++) { + final byte ll_i = dataShadow.selector[i]; + byte tmp = pos[0]; + int j = 0; + + while (ll_i != tmp) { + j++; + final byte tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + } + + pos[0] = tmp; + dataShadow.selectorMtf[i] = (byte) j; + } + } + + private void sendMTFValues3(final int nGroups, final int alphaSize) { + final int[][] code = this.data.sendMTFValues_code; + final byte[][] len = this.data.sendMTFValues_len; + + for (int t = 0; t < nGroups; t++) { + int minLen = 32; + int maxLen = 0; + final byte[] len_t = len[t]; + for (int i = alphaSize; --i >= 0;) { + final int l = len_t[i] & 0xff; + if (l > maxLen) { + maxLen = l; + } + if (l < minLen) { + minLen = l; + } + } + + // assert (maxLen <= 20) : maxLen; + // assert (minLen >= 1) : minLen; + + hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); + } + } + + private void sendMTFValues4() throws IOException { + final boolean[] inUse = this.data.inUse; + final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; + + for (int i = 16; --i >= 0;) { + inUse16[i] = false; + final int i16 = i * 16; + for (int j = 16; --j >= 0;) { + if (inUse[i16 + j]) { + inUse16[i] = true; + } + } + } + + for (int i = 0; i < 16; i++) { + bsW(1, inUse16[i] ? 1 : 0); + } + + final OutputStream outShadow = this.out; + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int i = 0; i < 16; i++) { + if (inUse16[i]) { + final int i16 = i * 16; + for (int j = 0; j < 16; j++) { + // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + if (inUse[i16 + j]) { + bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); + } + bsLiveShadow++; + } + } + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues5(final int nGroups, final int nSelectors) + throws IOException { + bsW(3, nGroups); + bsW(15, nSelectors); + + final OutputStream outShadow = this.out; + final byte[] selectorMtf = this.data.selectorMtf; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int i = 0; i < nSelectors; i++) { + for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { + // inlined: bsW(1, 1); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + + // inlined: bsW(1, 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues6(final int nGroups, final int alphaSize) + throws IOException { + final byte[][] len = this.data.sendMTFValues_len; + final OutputStream outShadow = this.out; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int t = 0; t < nGroups; t++) { + final byte[] len_t = len[t]; + int curr = len_t[0] & 0xff; + + // inlined: bsW(5, curr); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= curr << (32 - bsLiveShadow - 5); + bsLiveShadow += 5; + + for (int i = 0; i < alphaSize; i++) { + final int lti = len_t[i] & 0xff; + while (curr < lti) { + // inlined: bsW(2, 2); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); + bsLiveShadow += 2; + + curr++; /* 10 */ + } + + while (curr > lti) { + // inlined: bsW(2, 3); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); + bsLiveShadow += 2; + + curr--; /* 11 */ + } + + // inlined: bsW(1, 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues7() throws IOException { + final Data dataShadow = this.data; + final byte[][] len = dataShadow.sendMTFValues_len; + final int[][] code = dataShadow.sendMTFValues_code; + final OutputStream outShadow = this.out; + final byte[] selector = dataShadow.selector; + final char[] sfmap = dataShadow.sfmap; + final int nMTFShadow = this.nMTF; + + int selCtr = 0; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int gs = 0; gs < nMTFShadow;) { + final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); + final int selector_selCtr = selector[selCtr] & 0xff; + final int[] code_selCtr = code[selector_selCtr]; + final byte[] len_selCtr = len[selector_selCtr]; + + while (gs <= ge) { + final int sfmap_i = sfmap[gs]; + + // + // inlined: bsW(len_selCtr[sfmap_i] & 0xff, + // code_selCtr[sfmap_i]); + // + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + final int n = len_selCtr[sfmap_i] & 0xFF; + bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); + bsLiveShadow += n; + + gs++; + } + + gs = ge + 1; + selCtr++; + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void moveToFrontCodeAndSend() throws IOException { + bsW(24, this.data.origPtr); + generateMTFValues(); + sendMTFValues(); + } + + private void blockSort() { + blockSorter.blockSort(data, last); + } + + /* + * Performs Move-To-Front on the Burrows-Wheeler transformed + * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB + * run-length-encoded form. + * + * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> + */ + private void generateMTFValues() { + final int lastShadow = this.last; + final Data dataShadow = this.data; + final boolean[] inUse = dataShadow.inUse; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] sfmap = dataShadow.sfmap; + final int[] mtfFreq = dataShadow.mtfFreq; + final byte[] unseqToSeq = dataShadow.unseqToSeq; + final byte[] yy = dataShadow.generateMTFValues_yy; + + // make maps + int nInUseShadow = 0; + for (int i = 0; i < 256; i++) { + if (inUse[i]) { + unseqToSeq[i] = (byte) nInUseShadow; + nInUseShadow++; + } + } + this.nInUse = nInUseShadow; + + final int eob = nInUseShadow + 1; + + for (int i = eob; i >= 0; i--) { + mtfFreq[i] = 0; + } + + for (int i = nInUseShadow; --i >= 0;) { + yy[i] = (byte) i; + } + + int wr = 0; + int zPend = 0; + + for (int i = 0; i <= lastShadow; i++) { + final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; + byte tmp = yy[0]; + int j = 0; + + while (ll_i != tmp) { + j++; + final byte tmp2 = tmp; + tmp = yy[j]; + yy[j] = tmp2; + } + yy[0] = tmp; + + if (j == 0) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (true) { + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; + wr++; + mtfFreq[RUNA]++; + } else { + sfmap[wr] = RUNB; + wr++; + mtfFreq[RUNB]++; + } + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { + break; + } + } + zPend = 0; + } + sfmap[wr] = (char) (j + 1); + wr++; + mtfFreq[j + 1]++; + } + } + + if (zPend > 0) { + zPend--; + while (true) { + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; + wr++; + mtfFreq[RUNA]++; + } else { + sfmap[wr] = RUNB; + wr++; + mtfFreq[RUNB]++; + } + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { + break; + } + } + } + + sfmap[wr] = (char) eob; + mtfFreq[eob]++; + this.nMTF = wr + 1; + } + + static final class Data { + + // with blockSize 900k + /* maps unsigned byte => "does it occur in block" */ + final boolean[] inUse = new boolean[256]; // 256 byte + final byte[] unseqToSeq = new byte[256]; // 256 byte + final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte + final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + + final byte[] generateMTFValues_yy = new byte[256]; // 256 byte + final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 + // byte + final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 + // byte + final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte + final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte + final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 + // byte + final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte + final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte + + final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + + // ------------ + // 333408 byte + + /* holds the RLEd block of original data starting at index 1. + * After sorting the last byte added to the buffer is at index + * 0. */ + final byte[] block; // 900021 byte + /* maps index in Burrows-Wheeler transformed block => index of + * byte in original block */ + final int[] fmap; // 3600000 byte + final char[] sfmap; // 3600000 byte + // ------------ + // 8433529 byte + // ============ + + /** + * Index of original line in Burrows-Wheeler table. + * + * <p>This is the index in fmap that points to the last byte + * of the original data.</p> + */ + int origPtr; + + Data(final int blockSize100k) { + final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; + this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; + this.fmap = new int[n]; + this.sfmap = new char[2 * n]; + } + + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2Constants.java b/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2Constants.java new file mode 100644 index 000000000..9a8b9c4c0 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2Constants.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.bzip2; + +/** + * Constants for both the compress and decompress BZip2 classes. + */ +interface BZip2Constants { + + int BASEBLOCKSIZE = 100000; + int MAX_ALPHA_SIZE = 258; + int MAX_CODE_LEN = 23; + int RUNA = 0; + int RUNB = 1; + int N_GROUPS = 6; + int G_SIZE = 50; + int N_ITERS = 4; + int MAX_SELECTORS = (2 + (900000 / G_SIZE)); + int NUM_OVERSHOOT_BYTES = 20; + +}
\ No newline at end of file diff --git a/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2Utils.java b/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2Utils.java new file mode 100644 index 000000000..5582d981a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2Utils.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +import java.util.LinkedHashMap; +import java.util.Map; +import org.apache.commons.compress.compressors.FileNameUtil; + +/** + * Utility code for the BZip2 compression format. + * @ThreadSafe + * @since 1.1 + */ +public abstract class BZip2Utils { + + private static final FileNameUtil fileNameUtil; + + static { + final Map<String, String> uncompressSuffix = + new LinkedHashMap<>(); + // backwards compatibilty: BZip2Utils never created the short + // tbz form, so .tar.bz2 has to be added explicitly + uncompressSuffix.put(".tar.bz2", ".tar"); + uncompressSuffix.put(".tbz2", ".tar"); + uncompressSuffix.put(".tbz", ".tar"); + uncompressSuffix.put(".bz2", ""); + uncompressSuffix.put(".bz", ""); + fileNameUtil = new FileNameUtil(uncompressSuffix, ".bz2"); + } + + /** Private constructor to prevent instantiation of this utility class. */ + private BZip2Utils() { + } + + /** + * Detects common bzip2 suffixes in the given filename. + * + * @param filename name of a file + * @return {@code true} if the filename has a common bzip2 suffix, + * {@code false} otherwise + */ + public static boolean isCompressedFilename(final String filename) { + return fileNameUtil.isCompressedFilename(filename); + } + + /** + * Maps the given name of a bzip2-compressed file to the name that the + * file should have after uncompression. Commonly used file type specific + * suffixes like ".tbz" or ".tbz2" are automatically detected and + * correctly mapped. For example the name "package.tbz2" is mapped to + * "package.tar". And any filenames with the generic ".bz2" suffix + * (or any other generic bzip2 suffix) is mapped to a name without that + * suffix. If no bzip2 suffix is detected, then the filename is returned + * unmapped. + * + * @param filename name of a file + * @return name of the corresponding uncompressed file + */ + public static String getUncompressedFilename(final String filename) { + return fileNameUtil.getUncompressedFilename(filename); + } + + /** + * Maps the given filename to the name that the file should have after + * compression with bzip2. Currently this method simply appends the suffix + * ".bz2" to the filename based on the standard behaviour of the "bzip2" + * program, but a future version may implement a more complex mapping if + * a new widely used naming pattern emerges. + * + * @param filename name of a file + * @return name of the corresponding compressed file + */ + public static String getCompressedFilename(final String filename) { + return fileNameUtil.getCompressedFilename(filename); + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java b/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java new file mode 100644 index 000000000..69819e3da --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java @@ -0,0 +1,1082 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +import java.util.BitSet; + +/** + * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link + * BZip2CompressorOutputStream}. + * + * <p>This class is based on a Java port of Julian Seward's + * blocksort.c in his libbzip2</p> + * + * <p>The Burrows-Wheeler transform is a reversible transform of the + * original data that is supposed to group similar bytes close to + * each other. The idea is to sort all permutations of the input and + * only keep the last byte of each permutation. E.g. for "Commons + * Compress" you'd get:</p> + * + * <pre> + * CompressCommons + * Commons Compress + * CompressCommons + * essCommons Compr + * mmons CompressCo + * mons CompressCom + * mpressCommons Co + * ns CompressCommo + * ommons CompressC + * ompressCommons C + * ons CompressComm + * pressCommons Com + * ressCommons Comp + * s CompressCommon + * sCommons Compres + * ssCommons Compre + * </pre> + * + * <p>Which results in a new text "ss romooCCmmpnse", in adition the + * index of the first line that contained the original text is kept - + * in this case it is 1. The idea is that in a long English text all + * permutations that start with "he" are likely suffixes of a "the" and + * thus they end in "t" leading to a larger block of "t"s that can + * better be compressed by the subsequent Move-to-Front, run-length + * und Huffman encoding steps.</p> + * + * <p>For more information see for example:</p> + * <ul> + * <li><a + * href="http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf">Burrows, + * M. and Wheeler, D.: A Block-sorting Lossless Data Compression + * Algorithm</a></li> + * <li><a href="http://webglimpse.net/pubs/suffix.pdf">Manber, U. and + * Myers, G.: Suffix arrays: A new method for on-line string + * searches</a></li> + * <li><a + * href="http://www.cs.tufts.edu/~nr/comp150fp/archive/bob-sedgewick/fast-strings.pdf">Bentley, + * J.L. and Sedgewick, R.: Fast Algorithms for Sorting and Searching + * Strings</a></li> + * </ul> + * + * @NotThreadSafe + */ +class BlockSort { + + /* + * Some of the constructs used in the C code cannot be ported + * literally to Java - for example macros, unsigned types. Some + * code has been hand-tuned to improve performance. In order to + * avoid memory pressure some structures are reused for several + * blocks and some memory is even shared between sorting and the + * MTF stage even though either algorithm uses it for its own + * purpose. + * + * Comments preserved from the actual C code are prefixed with + * "LBZ2:". + */ + + /* + * 2012-05-20 Stefan Bodewig: + * + * This class seems to mix several revisions of libbzip2's code. + * The mainSort function and those used by it look closer to the + * 0.9.5 version but show some variations introduced later. At + * the same time the logic of Compress 1.4 to randomize the block + * on bad input has been dropped after libbzip2 0.9.0 and replaced + * by a fallback sorting algorithm. + * + * I've added the fallbackSort function of 1.0.6 and tried to + * integrate it with the existing code without touching too much. + * I've also removed the now unused randomization code. + */ + + /* + * LBZ2: If you are ever unlucky/improbable enough to get a stack + * overflow whilst sorting, increase the following constant and + * try again. In practice I have never seen the stack go above 27 + * elems, so the following limit seems very generous. + */ + private static final int QSORT_STACK_SIZE = 1000; + + private static final int FALLBACK_QSORT_STACK_SIZE = 100; + + private static final int STACK_SIZE = + QSORT_STACK_SIZE < FALLBACK_QSORT_STACK_SIZE + ? FALLBACK_QSORT_STACK_SIZE : QSORT_STACK_SIZE; + + /* + * Used when sorting. If too many long comparisons happen, we stop sorting, + * and use fallbackSort instead. + */ + private int workDone; + private int workLimit; + private boolean firstAttempt; + + private final int[] stack_ll = new int[STACK_SIZE]; // 4000 byte + private final int[] stack_hh = new int[STACK_SIZE]; // 4000 byte + private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte + + private final int[] mainSort_runningOrder = new int[256]; // 1024 byte + private final int[] mainSort_copy = new int[256]; // 1024 byte + private final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte + + private final int[] ftab = new int[65537]; // 262148 byte + + /** + * Array instance identical to Data's sfmap, both are used only + * temporarily and indepently, so we do not need to allocate + * additional memory. + */ + private final char[] quadrant; + + BlockSort(final BZip2CompressorOutputStream.Data data) { + this.quadrant = data.sfmap; + } + + void blockSort(final BZip2CompressorOutputStream.Data data, final int last) { + this.workLimit = WORK_FACTOR * last; + this.workDone = 0; + this.firstAttempt = true; + + if (last + 1 < 10000) { + fallbackSort(data, last); + } else { + mainSort(data, last); + + if (this.firstAttempt && (this.workDone > this.workLimit)) { + fallbackSort(data, last); + } + } + + final int[] fmap = data.fmap; + data.origPtr = -1; + for (int i = 0; i <= last; i++) { + if (fmap[i] == 0) { + data.origPtr = i; + break; + } + } + + // assert (data.origPtr != -1) : data.origPtr; + } + + /** + * Adapt fallbackSort to the expected interface of the rest of the + * code, in particular deal with the fact that block starts at + * offset 1 (in libbzip2 1.0.6 it starts at 0). + */ + final void fallbackSort(final BZip2CompressorOutputStream.Data data, + final int last) { + data.block[0] = data.block[last + 1]; + fallbackSort(data.fmap, data.block, last + 1); + for (int i = 0; i < last + 1; i++) { + --data.fmap[i]; + } + for (int i = 0; i < last + 1; i++) { + if (data.fmap[i] == -1) { + data.fmap[i] = last; + break; + } + } + } + +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +/*--- LBZ2: Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + + /* + * This is the fallback sorting algorithm libbzip2 1.0.6 uses for + * repetitive or very short inputs. + * + * The idea is inspired by Manber-Myers string suffix sorting + * algorithm. First a bucket sort places each permutation of the + * block into a bucket based on its first byte. Permutations are + * represented by pointers to their first character kept in + * (partially) sorted order inside the array ftab. + * + * The next step visits all buckets in order and performs a + * quicksort on all permutations of the bucket based on the index + * of the bucket the second byte of the permutation belongs to, + * thereby forming new buckets. When arrived here the + * permutations are sorted up to the second character and we have + * buckets of permutations that are identical up to two + * characters. + * + * Repeat the step of quicksorting each bucket, now based on the + * bucket holding the sequence of the third and forth character + * leading to four byte buckets. Repeat this doubling of bucket + * sizes until all buckets only contain single permutations or the + * bucket size exceeds the block size. + * + * I.e. + * + * "abraba" form three buckets for the chars "a", "b", and "r" in + * the first step with + * + * fmap = { 'a:' 5, 3, 0, 'b:' 4, 1, 'r', 2 } + * + * when looking at the bucket of "a"s the second characters are in + * the buckets that start with fmap-index 0 (rolled over), 3 and 3 + * respectively, forming two new buckets "aa" and "ab", so we get + * + * fmap = { 'aa:' 5, 'ab:' 3, 0, 'ba:' 4, 'br': 1, 'ra:' 2 } + * + * since the last bucket only contained a single item it didn't + * have to be sorted at all. + * + * There now is just one bucket with more than one permutation + * that remains to be sorted. For the permutation that starts + * with index 3 the third and forth char are in bucket 'aa' at + * index 0 and for the one starting at block index 0 they are in + * bucket 'ra' with sort index 5. The fully sorted order then becomes. + * + * fmap = { 5, 3, 0, 4, 1, 2 } + * + */ + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param eclass points from the index of a character inside the + * block to the first index in fmap that contains the + * bucket of its suffix that is sorted in this step. + * @param lo lower boundary of the fmap-interval to be sorted + * @param hi upper boundary of the fmap-interval to be sorted + */ + private void fallbackSimpleSort(final int[] fmap, + final int[] eclass, + final int lo, + final int hi) { + if (lo == hi) { + return; + } + + int j; + if (hi - lo > 3) { + for (int i = hi - 4; i >= lo; i--) { + final int tmp = fmap[i]; + final int ec_tmp = eclass[tmp]; + for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; + j += 4) { + fmap[j - 4] = fmap[j]; + } + fmap[j - 4] = tmp; + } + } + + for (int i = hi - 1; i >= lo; i--) { + final int tmp = fmap[i]; + final int ec_tmp = eclass[tmp]; + for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) { + fmap[j - 1] = fmap[j]; + } + fmap[j-1] = tmp; + } + } + + private static final int FALLBACK_QSORT_SMALL_THRESH = 10; + + /** + * swaps two values in fmap + */ + private void fswap(final int[] fmap, final int zz1, final int zz2) { + final int zztmp = fmap[zz1]; + fmap[zz1] = fmap[zz2]; + fmap[zz2] = zztmp; + } + + /** + * swaps two intervals starting at yyp1 and yyp2 of length yyn inside fmap. + */ + private void fvswap(final int[] fmap, int yyp1, int yyp2, int yyn) { + while (yyn > 0) { + fswap(fmap, yyp1, yyp2); + yyp1++; yyp2++; yyn--; + } + } + + private int fmin(final int a, final int b) { + return a < b ? a : b; + } + + private void fpush(final int sp, final int lz, final int hz) { + stack_ll[sp] = lz; + stack_hh[sp] = hz; + } + + private int[] fpop(final int sp) { + return new int[] { stack_ll[sp], stack_hh[sp] }; + } + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param eclass points from the index of a character inside the + * block to the first index in fmap that contains the + * bucket of its suffix that is sorted in this step. + * @param loSt lower boundary of the fmap-interval to be sorted + * @param hiSt upper boundary of the fmap-interval to be sorted + */ + private void fallbackQSort3(final int[] fmap, + final int[] eclass, + final int loSt, + final int hiSt) { + int lo, unLo, ltLo, hi, unHi, gtHi, n; + + long r = 0; + int sp = 0; + fpush(sp++, loSt, hiSt); + + while (sp > 0) { + final int[] s = fpop(--sp); + lo = s[0]; hi = s[1]; + + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort(fmap, eclass, lo, hi); + continue; + } + + /* LBZ2: Random partitioning. Median of 3 sometimes fails to + avoid bad cases. Median of 9 seems to help but + looks rather expensive. This too seems to work but + is cheaper. Guidance for the magic constants + 7621 and 32768 is taken from Sedgewick's algorithms + book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + final long r3 = r % 3; + long med; + if (r3 == 0) { + med = eclass[fmap[lo]]; + } else if (r3 == 1) { + med = eclass[fmap[(lo + hi) >>> 1]]; + } else { + med = eclass[fmap[hi]]; + } + + unLo = ltLo = lo; + unHi = gtHi = hi; + + // looks like the ternary partition attributed to Wegner + // in the cited Sedgewick paper + while (true) { + while (true) { + if (unLo > unHi) { + break; + } + n = eclass[fmap[unLo]] - (int) med; + if (n == 0) { + fswap(fmap, unLo, ltLo); + ltLo++; unLo++; + continue; + } + if (n > 0) { + break; + } + unLo++; + } + while (true) { + if (unLo > unHi) { + break; + } + n = eclass[fmap[unHi]] - (int) med; + if (n == 0) { + fswap(fmap, unHi, gtHi); + gtHi--; unHi--; + continue; + } + if (n < 0) { + break; + } + unHi--; + } + if (unLo > unHi) { + break; + } + fswap(fmap, unLo, unHi); unLo++; unHi--; + } + + if (gtHi < ltLo) { + continue; + } + + n = fmin(ltLo - lo, unLo - ltLo); + fvswap(fmap, lo, unLo - n, n); + int m = fmin(hi - gtHi, gtHi - unHi); + fvswap(fmap, unHi + 1, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush(sp++, lo, n); + fpush(sp++, m, hi); + } else { + fpush(sp++, m, hi); + fpush(sp++, lo, n); + } + } + } + + +/*---------------------------------------------*/ + + private int[] eclass; + + private int[] getEclass() { + if (eclass == null) { + eclass = new int[quadrant.length / 2]; + } + return eclass; + } + + /* + * The C code uses an array of ints (each int holding 32 flags) to + * represents the bucket-start flags (bhtab). It also contains + * optimizations to skip over 32 consecutively set or + * consecutively unset bits on word boundaries at once. For now + * I've chosen to use the simpler but potentially slower code + * using BitSet - also in the hope that using the BitSet#nextXXX + * methods may be fast enough. + */ + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param block the original data + * @param nblock size of the block + * @param off offset of first byte to sort in block + */ + final void fallbackSort(final int[] fmap, final byte[] block, final int nblock) { + final int[] ftab = new int[257]; + int H, i, j, k, l, r, cc, cc1; + int nNotDone; + int nBhtab; + final int[] eclass = getEclass(); + + for (i = 0; i < nblock; i++) { + eclass[i] = 0; + } + /*-- + LBZ2: Initial 1-char radix sort to generate + initial fmap and initial BH bits. + --*/ + for (i = 0; i < nblock; i++) { + ftab[block[i] & 0xff]++; + } + for (i = 1; i < 257; i++) { + ftab[i] += ftab[i - 1]; + } + + for (i = 0; i < nblock; i++) { + j = block[i] & 0xff; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 64 + nblock; + final BitSet bhtab = new BitSet(nBhtab); + for (i = 0; i < 256; i++) { + bhtab.set(ftab[i]); + } + + /*-- + LBZ2: Inductively refine the buckets. Kind-of an + "exponential radix sort" (!), inspired by the + Manber-Myers suffix array construction algorithm. + --*/ + + /*-- LBZ2: set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + bhtab.set(nblock + 2 * i); + bhtab.clear(nblock + 2 * i + 1); + } + + /*-- LBZ2: the log(N) loop --*/ + H = 1; + while (true) { + + j = 0; + for (i = 0; i < nblock; i++) { + if (bhtab.get(i)) { + j = i; + } + k = fmap[i] - H; + if (k < 0) { + k += nblock; + } + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (true) { + + /*-- LBZ2: find the next non-singleton bucket --*/ + k = r + 1; + k = bhtab.nextClearBit(k); + l = k - 1; + if (l >= nblock) { + break; + } + k = bhtab.nextSetBit(k + 1); + r = k - 1; + if (r >= nblock) { + break; + } + + /*-- LBZ2: now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3(fmap, eclass, l, r); + + /*-- LBZ2: scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { + bhtab.set(i); + cc = cc1; + } + } + } + } + + H *= 2; + if (H > nblock || nNotDone == 0) { + break; + } + } + } + +/*---------------------------------------------*/ + + /* + * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here. + * Possibly because the number of elems to sort is usually small, typically + * <= 20. + */ + private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, 797161, + 2391484 }; + + /** + * This is the most hammered method of this class. + * + * <p> + * This is the version using unrolled loops. Normally I never use such ones + * in Java code. The unrolling has shown a noticable performance improvement + * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the + * JIT compiler of the vm. + * </p> + */ + private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow, + final int lo, final int hi, final int d, + final int lastShadow) { + final int bigN = hi - lo + 1; + if (bigN < 2) { + return this.firstAttempt && (this.workDone > this.workLimit); + } + + int hp = 0; + while (INCS[hp] < bigN) { + hp++; + } + + final int[] fmap = dataShadow.fmap; + final char[] quadrant = this.quadrant; + final byte[] block = dataShadow.block; + final int lastPlus1 = lastShadow + 1; + final boolean firstAttemptShadow = this.firstAttempt; + final int workLimitShadow = this.workLimit; + int workDoneShadow = this.workDone; + + // Following block contains unrolled code which could be shortened by + // coding it in additional loops. + + HP: while (--hp >= 0) { + final int h = INCS[hp]; + final int mj = lo + h - 1; + + for (int i = lo + h; i <= hi;) { + // copy + for (int k = 3; (i <= hi) && (--k >= 0); i++) { + final int v = fmap[i]; + final int vd = v + d; + int j = i; + + // for (int a; + // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, + // block, quadrant, lastShadow); + // j -= h) { + // fmap[j] = a; + // } + // + // unrolled version: + + // start inline mainGTU + boolean onceRunned = false; + int a = 0; + + HAMMER: while (true) { + if (onceRunned) { + fmap[j] = a; + if ((j -= h) <= mj) { //NOSONAR + break HAMMER; + } + } else { + onceRunned = true; + } + + a = fmap[j - h]; + int i1 = a + d; + int i2 = vd; + + // following could be done in a loop, but + // unrolled it for performance: + if (block[i1 + 1] == block[i2 + 1]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (block[i1 + 5] == block[i2 + 5]) { + if (block[(i1 += 6)] == block[(i2 += 6)]) { //NOSONAR + int x = lastShadow; + X: while (x > 0) { + x -= 4; + + if (block[i1 + 1] == block[i2 + 1]) { + if (quadrant[i1] == quadrant[i2]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (quadrant[i1 + 1] == quadrant[i2 + 1]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (quadrant[i1 + 2] == quadrant[i2 + 2]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (quadrant[i1 + 3] == quadrant[i2 + 3]) { + if ((i1 += 4) >= lastPlus1) { //NOSONAR + i1 -= lastPlus1; + } + if ((i2 += 4) >= lastPlus1) { //NOSONAR + i2 -= lastPlus1; + } + workDoneShadow++; + continue X; + } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1] > quadrant[i2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } + break HAMMER; + } // while x > 0 + if ((block[i1] & 0xff) > (block[i2] & 0xff)) { + continue HAMMER; + } + break HAMMER; + } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } // HAMMER + // end inline mainGTU + + fmap[j] = v; + } + + if (firstAttemptShadow && (i <= hi) + && (workDoneShadow > workLimitShadow)) { + break HP; + } + } + } + + this.workDone = workDoneShadow; + return firstAttemptShadow && (workDoneShadow > workLimitShadow); + } + +/*-- + LBZ2: The following is an implementation of + an elegant 3-way quicksort for strings, + described in a paper "Fast Algorithms for + Sorting and Searching Strings", by Robert + Sedgewick and Jon L. Bentley. +--*/ + + private static void vswap(final int[] fmap, int p1, int p2, int n) { + n += p1; + while (p1 < n) { + final int t = fmap[p1]; + fmap[p1++] = fmap[p2]; + fmap[p2++] = t; + } + } + + private static byte med3(final byte a, final byte b, final byte c) { + return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c + : a); + } + + private static final int SMALL_THRESH = 20; + private static final int DEPTH_THRESH = 10; + private static final int WORK_FACTOR = 30; + + /** + * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 + */ + private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, + final int loSt, final int hiSt, final int dSt, + final int last) { + final int[] stack_ll = this.stack_ll; + final int[] stack_hh = this.stack_hh; + final int[] stack_dd = this.stack_dd; + final int[] fmap = dataShadow.fmap; + final byte[] block = dataShadow.block; + + stack_ll[0] = loSt; + stack_hh[0] = hiSt; + stack_dd[0] = dSt; + + for (int sp = 1; --sp >= 0;) { + final int lo = stack_ll[sp]; + final int hi = stack_hh[sp]; + final int d = stack_dd[sp]; + + if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { + if (mainSimpleSort(dataShadow, lo, hi, d, last)) { + return; + } + } else { + final int d1 = d + 1; + final int med = med3(block[fmap[lo] + d1], + block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff; + + int unLo = lo; + int unHi = hi; + int ltLo = lo; + int gtHi = hi; + + while (true) { + while (unLo <= unHi) { + final int n = (block[fmap[unLo] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[ltLo]; + fmap[ltLo++] = temp; + } else if (n < 0) { + unLo++; + } else { + break; + } + } + + while (unLo <= unHi) { + final int n = (block[fmap[unHi] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unHi]; + fmap[unHi--] = fmap[gtHi]; + fmap[gtHi--] = temp; + } else if (n > 0) { + unHi--; + } else { + break; + } + } + + if (unLo <= unHi) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[unHi]; + fmap[unHi--] = temp; + } else { + break; + } + } + + if (gtHi < ltLo) { + stack_ll[sp] = lo; + stack_hh[sp] = hi; + stack_dd[sp] = d1; + sp++; + } else { + int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) + : (unLo - ltLo); + vswap(fmap, lo, unLo - n, n); + int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) + : (gtHi - unHi); + vswap(fmap, unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack_ll[sp] = lo; + stack_hh[sp] = n; + stack_dd[sp] = d; + sp++; + + stack_ll[sp] = n + 1; + stack_hh[sp] = m - 1; + stack_dd[sp] = d1; + sp++; + + stack_ll[sp] = m; + stack_hh[sp] = hi; + stack_dd[sp] = d; + sp++; + } + } + } + } + + private static final int SETMASK = (1 << 21); + private static final int CLEARMASK = (~SETMASK); + + final void mainSort(final BZip2CompressorOutputStream.Data dataShadow, + final int lastShadow) { + final int[] runningOrder = this.mainSort_runningOrder; + final int[] copy = this.mainSort_copy; + final boolean[] bigDone = this.mainSort_bigDone; + final int[] ftab = this.ftab; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] quadrant = this.quadrant; + final int workLimitShadow = this.workLimit; + final boolean firstAttemptShadow = this.firstAttempt; + + // LBZ2: Set up the 2-byte frequency table + for (int i = 65537; --i >= 0;) { + ftab[i] = 0; + } + + /* + * In the various block-sized structures, live data runs from 0 to + * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area + * for block. + */ + for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) { + block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; + } + for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES +1; --i >= 0;) { + quadrant[i] = 0; + } + block[0] = block[lastShadow + 1]; + + // LBZ2: Complete the initial radix sort: + + int c1 = block[0] & 0xff; + for (int i = 0; i <= lastShadow; i++) { + final int c2 = block[i + 1] & 0xff; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } + + for (int i = 1; i <= 65536; i++) { + ftab[i] += ftab[i - 1]; + } + + c1 = block[1] & 0xff; + for (int i = 0; i < lastShadow; i++) { + final int c2 = block[i + 2] & 0xff; + fmap[--ftab[(c1 << 8) + c2]] = i; + c1 = c2; + } + + fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; + + /* + * LBZ2: Now ftab contains the first loc of every small bucket. Calculate the + * running order, from smallest to largest big bucket. + */ + for (int i = 256; --i >= 0;) { + bigDone[i] = false; + runningOrder[i] = i; + } + + // h = 364, 121, 40, 13, 4, 1 + for (int h = 364; h != 1;) { //NOSONAR + h /= 3; + for (int i = h; i <= 255; i++) { + final int vv = runningOrder[i]; + final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; + final int b = h - 1; + int j = i; + for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j + - h]) { + runningOrder[j] = ro; + j -= h; + if (j <= b) { + break; + } + } + runningOrder[j] = vv; + } + } + + /* + * LBZ2: The main sorting loop. + */ + for (int i = 0; i <= 255; i++) { + /* + * LBZ2: Process big buckets, starting with the least full. + */ + final int ss = runningOrder[i]; + + // Step 1: + /* + * LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small + * buckets [ss, j]. Hopefully previous pointer-scanning phases have + * already completed many of the small buckets [ss, j], so we don't + * have to sort them at all. + */ + for (int j = 0; j <= 255; j++) { + final int sb = (ss << 8) + j; + final int ftab_sb = ftab[sb]; + if ((ftab_sb & SETMASK) != SETMASK) { + final int lo = ftab_sb & CLEARMASK; + final int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3(dataShadow, lo, hi, 2, lastShadow); + if (firstAttemptShadow + && (this.workDone > workLimitShadow)) { + return; + } + } + ftab[sb] = ftab_sb | SETMASK; + } + } + + // Step 2: + // LBZ2: Now scan this big bucket so as to synthesise the + // sorted order for small buckets [t, ss] for all t != ss. + + for (int j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + } + + for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) { + final int fmap_j = fmap[j]; + c1 = block[fmap_j] & 0xff; + if (!bigDone[c1]) { + fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); + copy[c1]++; + } + } + + for (int j = 256; --j >= 0;) { + ftab[(j << 8) + ss] |= SETMASK; + } + + // Step 3: + /* + * LBZ2: The ss big bucket is now done. Record this fact, and update the + * quadrant descriptors. Remember to update quadrants in the + * overshoot area too, if necessary. The "if (i < 255)" test merely + * skips this updating for the last bucket processed, since updating + * for the last bucket is pointless. + */ + bigDone[ss] = true; + + if (i < 255) { + final int bbStart = ftab[ss << 8] & CLEARMASK; + final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; + + while ((bbSize >> shifts) > 65534) { + shifts++; + } + + for (int j = 0; j < bbSize; j++) { + final int a2update = fmap[bbStart + j]; + final char qVal = (char) (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) { + quadrant[a2update + lastShadow + 1] = qVal; + } + } + } + + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/bzip2/CRC.java b/src/main/java/org/apache/commons/compress/compressors/bzip2/CRC.java new file mode 100644 index 000000000..a20ea7063 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/bzip2/CRC.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +/** + * A simple class the hold and calculate the CRC for sanity checking of the + * data. + * @NotThreadSafe + */ +class CRC { + private static final int crc32Table[] = { + 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, + 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, + 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, + 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, + 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, + 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, + 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, + 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, + 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, + 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, + 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, + 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, + 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, + 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, + 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, + 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, + 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, + 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, + 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, + 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, + 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, + 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, + 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, + 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, + 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, + 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, + 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, + 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, + 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, + 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, + 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, + 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, + 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, + 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, + 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, + 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, + 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, + 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, + 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 + }; + + CRC() { + initialiseCRC(); + } + + void initialiseCRC() { + globalCrc = 0xffffffff; + } + + int getFinalCRC() { + return ~globalCrc; + } + + int getGlobalCRC() { + return globalCrc; + } + + void setGlobalCRC(final int newCrc) { + globalCrc = newCrc; + } + + void updateCRC(final int inCh) { + int temp = (globalCrc >> 24) ^ inCh; + if (temp < 0) { + temp = 256 + temp; + } + globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp]; + } + + void updateCRC(final int inCh, int repeat) { + int globalCrcShadow = this.globalCrc; + while (repeat-- > 0) { + final int temp = (globalCrcShadow >> 24) ^ inCh; + globalCrcShadow = (globalCrcShadow << 8) ^ crc32Table[(temp >= 0) + ? temp + : (temp + 256)]; + } + this.globalCrc = globalCrcShadow; + } + + private int globalCrc; +}
\ No newline at end of file diff --git a/src/main/java/org/apache/commons/compress/compressors/bzip2/Rand.java b/src/main/java/org/apache/commons/compress/compressors/bzip2/Rand.java new file mode 100644 index 000000000..bb6ef80ba --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/bzip2/Rand.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.bzip2; + +/** + * Random numbers for both the compress and decompress BZip2 classes. + */ +final class Rand { + + private static final int[] RNUMS = { + 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, + 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, + 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, + 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, + 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, + 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, + 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, + 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, + 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, + 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, + 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, + 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, + 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, + 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, + 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, + 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, + 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, + 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, + 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, + 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, + 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, + 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, + 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, + 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, + 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, + 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, + 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, + 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, + 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, + 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, + 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, + 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, + 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, + 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, + 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, + 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, + 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, + 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, + 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, + 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, + 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, + 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, + 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, + 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, + 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, + 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, + 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, + 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, + 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, + 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, + 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, + 936, 638 + }; + + /** + * Return the random number at a specific index. + * + * @param i the index + * @return the random number + */ + static int rNums(final int i){ + return RNUMS[i]; + } +}
\ No newline at end of file diff --git a/src/main/java/org/apache/commons/compress/compressors/bzip2/package.html b/src/main/java/org/apache/commons/compress/compressors/bzip2/package.html new file mode 100644 index 000000000..fe27e6e66 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/bzip2/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for compressing and decompressing + streams using the BZip2 algorithm.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorInputStream.java new file mode 100644 index 000000000..0e072844a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorInputStream.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.deflate; + +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.Inflater; +import java.util.zip.InflaterInputStream; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * Deflate decompressor. + * @since 1.9 + */ +public class DeflateCompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + private static final int MAGIC_1 = 0x78; + private static final int MAGIC_2a = 0x01; + private static final int MAGIC_2b = 0x5e; + private static final int MAGIC_2c = 0x9c; + private static final int MAGIC_2d = 0xda; + + private final CountingInputStream countingStream; + private final InputStream in; + private final Inflater inflater; + + /** + * Creates a new input stream that decompresses Deflate-compressed data + * from the specified input stream. + * + * @param inputStream where to read the compressed data + * + */ + public DeflateCompressorInputStream(final InputStream inputStream) { + this(inputStream, new DeflateParameters()); + } + + /** + * Creates a new input stream that decompresses Deflate-compressed data + * from the specified input stream. + * + * @param inputStream where to read the compressed data + * @param parameters parameters + */ + public DeflateCompressorInputStream(final InputStream inputStream, + final DeflateParameters parameters) { + inflater = new Inflater(!parameters.withZlibHeader()); + in = new InflaterInputStream(countingStream = new CountingInputStream(inputStream), inflater); + } + + /** {@inheritDoc} */ + @Override + public int read() throws IOException { + final int ret = in.read(); + count(ret == -1 ? 0 : 1); + return ret; + } + + /** {@inheritDoc} */ + @Override + public int read(final byte[] buf, final int off, final int len) throws IOException { + final int ret = in.read(buf, off, len); + count(ret); + return ret; + } + + /** {@inheritDoc} */ + @Override + public long skip(final long n) throws IOException { + return IOUtils.skip(in, n); + } + + /** {@inheritDoc} */ + @Override + public int available() throws IOException { + return in.available(); + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + try { + in.close(); + } finally { + inflater.end(); + } + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return countingStream.getBytesRead(); + } + + /** + * Checks if the signature matches what is expected for a zlib / deflated file + * with the zlib header. + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is zlib / deflate compressed with a header + * stream, false otherwise + * + * @since 1.10 + */ + public static boolean matches(final byte[] signature, final int length) { + return length > 3 && signature[0] == MAGIC_1 && ( + signature[1] == (byte) MAGIC_2a || + signature[1] == (byte) MAGIC_2b || + signature[1] == (byte) MAGIC_2c || + signature[1] == (byte) MAGIC_2d); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorOutputStream.java new file mode 100644 index 000000000..a31560563 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorOutputStream.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.deflate; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; + +import org.apache.commons.compress.compressors.CompressorOutputStream; + +/** + * Deflate compressor. + * @since 1.9 + */ +public class DeflateCompressorOutputStream extends CompressorOutputStream { + private final DeflaterOutputStream out; + private final Deflater deflater; + + /** + * Creates a Deflate compressed output stream with the default parameters. + * @param outputStream the stream to wrap + * @throws IOException on error + */ + public DeflateCompressorOutputStream(final OutputStream outputStream) throws IOException { + this(outputStream, new DeflateParameters()); + } + + /** + * Creates a Deflate compressed output stream with the specified parameters. + * @param outputStream the stream to wrap + * @param parameters the deflate parameters to apply + * @throws IOException on error + */ + public DeflateCompressorOutputStream(final OutputStream outputStream, + final DeflateParameters parameters) throws IOException { + this.deflater = new Deflater(parameters.getCompressionLevel(), !parameters.withZlibHeader()); + this.out = new DeflaterOutputStream(outputStream, deflater); + } + + @Override + public void write(final int b) throws IOException { + out.write(b); + } + + @Override + public void write(final byte[] buf, final int off, final int len) throws IOException { + out.write(buf, off, len); + } + + /** + * Flushes the encoder and calls <code>outputStream.flush()</code>. + * All buffered pending data will then be decompressible from + * the output stream. Calling this function very often may increase + * the compressed file size a lot. + */ + @Override + public void flush() throws IOException { + out.flush(); + } + + /** + * Finishes compression without closing the underlying stream. + * <p>No more data can be written to this stream after finishing.</p> + * @throws IOException on error + */ + public void finish() throws IOException { + out.finish(); + } + + @Override + public void close() throws IOException { + try { + out.close(); + } finally { + deflater.end(); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateParameters.java b/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateParameters.java new file mode 100644 index 000000000..7679942a2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/deflate/DeflateParameters.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.deflate; + +import java.util.zip.Deflater; + +/** + * Parameters for the Deflate compressor. + * @since 1.9 + */ +public class DeflateParameters { + + private boolean zlibHeader = true; + private int compressionLevel = Deflater.DEFAULT_COMPRESSION; + + /** + * Whether or not the zlib header shall be written (when + * compressing) or expected (when decompressing). + * @return true if zlib header shall be written + */ + public boolean withZlibHeader() { + return zlibHeader; + } + + /** + * Sets the zlib header presence parameter. + * + * <p>This affects whether or not the zlib header will be written + * (when compressing) or expected (when decompressing).</p> + * + * @param zlibHeader true if zlib header shall be written + */ + public void setWithZlibHeader(final boolean zlibHeader) { + this.zlibHeader = zlibHeader; + } + + /** + * The compression level. + * @see #setCompressionLevel + * @return the compression level + */ + public int getCompressionLevel() { + return compressionLevel; + } + + /** + * Sets the compression level. + * + * @param compressionLevel the compression level (between 0 and 9) + * @see Deflater#NO_COMPRESSION + * @see Deflater#BEST_SPEED + * @see Deflater#DEFAULT_COMPRESSION + * @see Deflater#BEST_COMPRESSION + */ + public void setCompressionLevel(final int compressionLevel) { + if (compressionLevel < -1 || compressionLevel > 9) { + throw new IllegalArgumentException("Invalid Deflate compression level: " + compressionLevel); + } + this.compressionLevel = compressionLevel; + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/deflate/package.html b/src/main/java/org/apache/commons/compress/compressors/deflate/package.html new file mode 100644 index 000000000..4ddeb7487 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/deflate/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides a stream classes that allow (de)compressing streams + using the DEFLATE algorithm.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/deflate64/Deflate64CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/deflate64/Deflate64CompressorInputStream.java new file mode 100644 index 000000000..8cbe44d4a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/deflate64/Deflate64CompressorInputStream.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.compressors.deflate64; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.InputStreamStatistics; + +import static org.apache.commons.compress.utils.IOUtils.closeQuietly; + +/** + * Deflate64 decompressor. + * + * @since 1.16 + * @NotThreadSafe + */ +public class Deflate64CompressorInputStream extends CompressorInputStream implements InputStreamStatistics { + private InputStream originalStream; + private HuffmanDecoder decoder; + private long compressedBytesRead; + private final byte[] oneByte = new byte[1]; + + /** + * Constructs a Deflate64CompressorInputStream. + * + * @param in the stream to read from + */ + public Deflate64CompressorInputStream(InputStream in) { + this(new HuffmanDecoder(in)); + originalStream = in; + } + + Deflate64CompressorInputStream(HuffmanDecoder decoder) { + this.decoder = decoder; + } + + /** + * @throws java.io.EOFException if the underlying stream is exhausted before the end of defalted data was reached. + */ + @Override + public int read() throws IOException { + while (true) { + int r = read(oneByte); + switch (r) { + case 1: + return oneByte[0] & 0xFF; + case -1: + return -1; + case 0: + continue; + default: + throw new IllegalStateException("Invalid return value from read: " + r); + } + } + } + + /** + * @throws java.io.EOFException if the underlying stream is exhausted before the end of defalted data was reached. + */ + @Override + public int read(byte[] b, int off, int len) throws IOException { + int read = -1; + if (decoder != null) { + read = decoder.decode(b, off, len); + compressedBytesRead = decoder.getBytesRead(); + count(read); + if (read == -1) { + closeDecoder(); + } + } + return read; + } + + @Override + public int available() throws IOException { + return decoder != null ? decoder.available() : 0; + } + + @Override + public void close() throws IOException { + try { + closeDecoder(); + } finally { + if (originalStream != null) { + originalStream.close(); + originalStream = null; + } + } + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return compressedBytesRead; + } + + private void closeDecoder() { + closeQuietly(decoder); + decoder = null; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/deflate64/HuffmanDecoder.java b/src/main/java/org/apache/commons/compress/compressors/deflate64/HuffmanDecoder.java new file mode 100644 index 000000000..a6afa2cbc --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/deflate64/HuffmanDecoder.java @@ -0,0 +1,541 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.compressors.deflate64; + +import org.apache.commons.compress.utils.BitInputStream; + +import java.io.Closeable; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; +import java.util.Arrays; + +import static org.apache.commons.compress.compressors.deflate64.HuffmanState.*; + +class HuffmanDecoder implements Closeable { + + /** + * <pre> + * -------------------------------------------------------------------- + * idx xtra base idx xtra base idx xtra base + * -------------------------------------------------------------------- + * 257 0 3 267 1 15,16 277 4 67-82 + * 258 0 4 268 1 17,18 278 4 83-98 + * 259 0 5 269 2 19-22 279 4 99-114 + * 260 0 6 270 2 23-26 280 4 115-130 + * 261 0 7 271 2 27-30 281 5 131-162 + * 262 0 8 272 2 31-34 282 5 163-194 + * 263 0 9 273 3 35-42 283 5 195-226 + * 264 0 10 274 3 43-50 284 5 227-257 + * 265 1 11,12 275 3 51-58 285 16 3 + * 266 1 13,14 276 3 59-66 + * -------------------------------------------------------------------- + * </pre> + * value = (base of run length) << 5 | (number of extra bits to read) + */ + private static final short[] RUN_LENGTH_TABLE = { + 96, 128, 160, 192, 224, 256, 288, 320, 353, 417, 481, 545, 610, 738, 866, + 994, 1123, 1379, 1635, 1891, 2148, 2660, 3172, 3684, 4197, 5221, 6245, 7269, 112 + }; + + /** + * <pre> + * -------------------------------------------------------------------- + * idx xtra dist idx xtra dist idx xtra dist + * -------------------------------------------------------------------- + * 0 0 1 10 4 33-48 20 9 1025-1536 + * 1 0 2 11 4 49-64 21 9 1537-2048 + * 2 0 3 12 5 65-96 22 10 2049-3072 + * 3 0 4 13 5 97-128 23 10 3073-4096 + * 4 1 5,6 14 6 129-192 24 11 4097-6144 + * 5 1 7,8 15 6 193-256 25 11 6145-8192 + * 6 2 9-12 16 7 257-384 26 12 8193-12288 + * 7 2 13-16 17 7 385-512 27 12 12289-16384 + * 8 3 17-24 18 8 513-768 28 13 16385-24576 + * 9 3 25-32 19 8 769-1024 29 13 24577-32768 + * 30 14 32769-49152 + * 31 14 49153-65536 + * -------------------------------------------------------------------- + * </pre> + * value = (base of distance) << 4 | (number of extra bits to read) + */ + private static final int[] DISTANCE_TABLE = { + 16, 32, 48, 64, 81, 113, 146, 210, 275, 403, // 0-9 + 532, 788, 1045, 1557, 2070, 3094, 4119, 6167, 8216, 12312, // 10-19 + 16409, 24601, 32794, 49178, 65563, 98331, 131100, 196636, 262173, 393245, // 20-29 + 524318, 786462 // 30-31 + }; + + /** + * When using dynamic huffman codes the order in which the values are stored + * follows the positioning below + */ + private static final int[] CODE_LENGTHS_ORDER = + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /** + * Huffman Fixed Literal / Distance tables for mode 1 + */ + private static final int[] FIXED_LITERALS; + private static final int[] FIXED_DISTANCE; + + static { + FIXED_LITERALS = new int[288]; + Arrays.fill(FIXED_LITERALS, 0, 144, 8); + Arrays.fill(FIXED_LITERALS, 144, 256, 9); + Arrays.fill(FIXED_LITERALS, 256, 280, 7); + Arrays.fill(FIXED_LITERALS, 280, 288, 8); + + FIXED_DISTANCE = new int[32]; + Arrays.fill(FIXED_DISTANCE, 5); + } + + private boolean finalBlock = false; + private DecoderState state; + private BitInputStream reader; + private final InputStream in; + + private final DecodingMemory memory = new DecodingMemory(); + + HuffmanDecoder(InputStream in) { + this.reader = new BitInputStream(in, ByteOrder.LITTLE_ENDIAN); + this.in = in; + state = new InitialState(); + } + + @Override + public void close() { + state = new InitialState(); + reader = null; + } + + public int decode(byte[] b) throws IOException { + return decode(b, 0, b.length); + } + + public int decode(byte[] b, int off, int len) throws IOException { + while (!finalBlock || state.hasData()) { + if (state.state() == INITIAL) { + finalBlock = readBits(1) == 1; + int mode = (int) readBits(2); + switch (mode) { + case 0: + switchToUncompressedState(); + break; + case 1: + state = new HuffmanCodes(FIXED_CODES, FIXED_LITERALS, FIXED_DISTANCE); + break; + case 2: + int[][] tables = readDynamicTables(); + state = new HuffmanCodes(DYNAMIC_CODES, tables[0], tables[1]); + break; + default: + throw new IllegalStateException("Unsupported compression: " + mode); + } + } else { + return state.read(b, off, len); + } + } + return -1; + } + + /** + * @since 1.17 + */ + long getBytesRead() { + return reader.getBytesRead(); + } + + private void switchToUncompressedState() throws IOException { + reader.alignWithByteBoundary(); + long bLen = readBits(16); + long bNLen = readBits(16); + if (((bLen ^ 0xFFFF) & 0xFFFF) != bNLen) { + //noinspection DuplicateStringLiteralInspection + throw new IllegalStateException("Illegal LEN / NLEN values"); + } + state = new UncompressedState(bLen); + } + + private int[][] readDynamicTables() throws IOException { + int[][] result = new int[2][]; + int literals = (int) (readBits(5) + 257); + result[0] = new int[literals]; + + int distances = (int) (readBits(5) + 1); + result[1] = new int[distances]; + + populateDynamicTables(reader, result[0], result[1]); + return result; + } + + int available() throws IOException { + return state.available(); + } + + private abstract static class DecoderState { + abstract HuffmanState state(); + + abstract int read(byte[] b, int off, int len) throws IOException; + + abstract boolean hasData(); + + abstract int available() throws IOException ; + } + + private class UncompressedState extends DecoderState { + private final long blockLength; + private long read; + + private UncompressedState(long blockLength) { + this.blockLength = blockLength; + } + + @Override + HuffmanState state() { + return read < blockLength ? STORED : INITIAL; + } + + @Override + int read(byte[] b, int off, int len) throws IOException { + // as len is an int and (blockLength - read) is >= 0 the min must fit into an int as well + int max = (int) Math.min(blockLength - read, len); + int readSoFar = 0; + while (readSoFar < max) { + int readNow; + if (reader.bitsCached() > 0) { + byte next = (byte) readBits(Byte.SIZE); + b[off + readSoFar] = memory.add(next); + readNow = 1; + } else { + readNow = in.read(b, off + readSoFar, max - readSoFar); + if (readNow == -1) { + throw new EOFException("Truncated Deflate64 Stream"); + } + memory.add(b, off + readSoFar, readNow); + } + read += readNow; + readSoFar += readNow; + } + return max; + } + + @Override + boolean hasData() { + return read < blockLength; + } + + @Override + int available() throws IOException { + return (int) Math.min(blockLength - read, reader.bitsAvailable() / Byte.SIZE); + } + } + + private class InitialState extends DecoderState { + @Override + HuffmanState state() { + return INITIAL; + } + + @Override + int read(byte[] b, int off, int len) throws IOException { + throw new IllegalStateException("Cannot read in this state"); + } + + @Override + boolean hasData() { + return false; + } + + @Override + int available() { + return 0; + } + } + + private class HuffmanCodes extends DecoderState { + private boolean endOfBlock = false; + private final HuffmanState state; + private final BinaryTreeNode lengthTree; + private final BinaryTreeNode distanceTree; + + private int runBufferPos = 0; + private byte[] runBuffer = new byte[0]; + private int runBufferLength = 0; + + HuffmanCodes(HuffmanState state, int[] lengths, int[] distance) { + this.state = state; + lengthTree = buildTree(lengths); + distanceTree = buildTree(distance); + } + + @Override + HuffmanState state() { + return endOfBlock ? INITIAL : state; + } + + @Override + int read(byte[] b, int off, int len) throws IOException { + return decodeNext(b, off, len); + } + + private int decodeNext(byte[] b, int off, int len) throws IOException { + if (endOfBlock) { + return -1; + } + int result = copyFromRunBuffer(b, off, len); + + while (result < len) { + int symbol = nextSymbol(reader, lengthTree); + if (symbol < 256) { + b[off + result++] = memory.add((byte) symbol); + } else if (symbol > 256) { + int runMask = RUN_LENGTH_TABLE[symbol - 257]; + int run = runMask >>> 5; + int runXtra = runMask & 0x1F; + run += readBits(runXtra); + + int distSym = nextSymbol(reader, distanceTree); + + int distMask = DISTANCE_TABLE[distSym]; + int dist = distMask >>> 4; + int distXtra = distMask & 0xF; + dist += readBits(distXtra); + + if (runBuffer.length < run) { + runBuffer = new byte[run]; + } + runBufferLength = run; + runBufferPos = 0; + memory.recordToBuffer(dist, run, runBuffer); + + result += copyFromRunBuffer(b, off + result, len - result); + } else { + endOfBlock = true; + return result; + } + } + + return result; + } + + private int copyFromRunBuffer(byte[] b, int off, int len) { + int bytesInBuffer = runBufferLength - runBufferPos; + int copiedBytes = 0; + if (bytesInBuffer > 0) { + copiedBytes = Math.min(len, bytesInBuffer); + System.arraycopy(runBuffer, runBufferPos, b, off, copiedBytes); + runBufferPos += copiedBytes; + } + return copiedBytes; + } + + @Override + boolean hasData() { + return !endOfBlock; + } + + @Override + int available() { + return runBufferLength - runBufferPos; + } + } + + private static int nextSymbol(BitInputStream reader, BinaryTreeNode tree) throws IOException { + BinaryTreeNode node = tree; + while (node != null && node.literal == -1) { + long bit = readBits(reader, 1); + node = bit == 0 ? node.leftNode : node.rightNode; + } + return node != null ? node.literal : -1; + } + + private static void populateDynamicTables(BitInputStream reader, int[] literals, int[] distances) throws IOException { + int codeLengths = (int) (readBits(reader, 4) + 4); + + int[] codeLengthValues = new int[19]; + for (int cLen = 0; cLen < codeLengths; cLen++) { + codeLengthValues[CODE_LENGTHS_ORDER[cLen]] = (int) readBits(reader, 3); + } + + BinaryTreeNode codeLengthTree = buildTree(codeLengthValues); + + final int[] auxBuffer = new int[literals.length + distances.length]; + + int value = -1; + int length = 0; + int off = 0; + while (off < auxBuffer.length) { + if (length > 0) { + auxBuffer[off++] = value; + length--; + } else { + int symbol = nextSymbol(reader, codeLengthTree); + if (symbol < 16) { + value = symbol; + auxBuffer[off++] = value; + } else if (symbol == 16) { + length = (int) (readBits(reader, 2) + 3); + } else if (symbol == 17) { + value = 0; + length = (int) (readBits(reader, 3) + 3); + } else if (symbol == 18) { + value = 0; + length = (int) (readBits(reader, 7) + 11); + } + } + } + + System.arraycopy(auxBuffer, 0, literals, 0, literals.length); + System.arraycopy(auxBuffer, literals.length, distances, 0, distances.length); + } + + private static class BinaryTreeNode { + private final int bits; + int literal = -1; + BinaryTreeNode leftNode; + BinaryTreeNode rightNode; + + private BinaryTreeNode(int bits) { + this.bits = bits; + } + + void leaf(int symbol) { + literal = symbol; + leftNode = null; + rightNode = null; + } + + BinaryTreeNode left() { + if (leftNode == null && literal == -1) { + leftNode = new BinaryTreeNode(bits + 1); + } + return leftNode; + } + + BinaryTreeNode right() { + if (rightNode == null && literal == -1) { + rightNode = new BinaryTreeNode(bits + 1); + } + return rightNode; + } + } + + private static BinaryTreeNode buildTree(int[] litTable) { + int[] literalCodes = getCodes(litTable); + + BinaryTreeNode root = new BinaryTreeNode(0); + + for (int i = 0; i < litTable.length; i++) { + int len = litTable[i]; + if (len != 0) { + BinaryTreeNode node = root; + int lit = literalCodes[len - 1]; + for (int p = len - 1; p >= 0; p--) { + int bit = lit & (1 << p); + node = bit == 0 ? node.left() : node.right(); + } + node.leaf(i); + literalCodes[len - 1]++; + } + } + return root; + } + + private static int[] getCodes(int[] litTable) { + int max = 0; + int[] blCount = new int[65]; + + for (int aLitTable : litTable) { + max = Math.max(max, aLitTable); + blCount[aLitTable]++; + } + blCount = Arrays.copyOf(blCount, max + 1); + + int code = 0; + int[] nextCode = new int[max + 1]; + for (int i = 0; i <= max; i++) { + code = (code + blCount[i]) << 1; + nextCode[i] = code; + } + + return nextCode; + } + + private static class DecodingMemory { + private final byte[] memory; + private final int mask; + private int wHead; + private boolean wrappedAround; + + private DecodingMemory() { + this(16); + } + + private DecodingMemory(int bits) { + memory = new byte[1 << bits]; + mask = memory.length - 1; + } + + byte add(byte b) { + memory[wHead] = b; + wHead = incCounter(wHead); + return b; + } + + void add(byte[] b, int off, int len) { + for (int i = off; i < off + len; i++) { + add(b[i]); + } + } + + void recordToBuffer(int distance, int length, byte[] buff) { + if (distance > memory.length) { + throw new IllegalStateException("Illegal distance parameter: " + distance); + } + int start = (wHead - distance) & mask; + if (!wrappedAround && start >= wHead) { + throw new IllegalStateException("Attempt to read beyond memory: dist=" + distance); + } + for (int i = 0, pos = start; i < length; i++, pos = incCounter(pos)) { + buff[i] = add(memory[pos]); + } + } + + private int incCounter(int counter) { + final int newCounter = (counter + 1) & mask; + if (!wrappedAround && newCounter < counter) { + wrappedAround = true; + } + return newCounter; + } + } + + private long readBits(int numBits) throws IOException { + return readBits(reader, numBits); + } + + private static long readBits(BitInputStream reader, int numBits) throws IOException { + long r = reader.readBits(numBits); + if (r == -1) { + throw new EOFException("Truncated Deflate64 Stream"); + } + return r; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/deflate64/HuffmanState.java b/src/main/java/org/apache/commons/compress/compressors/deflate64/HuffmanState.java new file mode 100644 index 000000000..b34bb7ed9 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/deflate64/HuffmanState.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.compressors.deflate64; + +enum HuffmanState { + INITIAL, + STORED, + DYNAMIC_CODES, + FIXED_CODES +} diff --git a/src/main/java/org/apache/commons/compress/compressors/deflate64/package.html b/src/main/java/org/apache/commons/compress/compressors/deflate64/package.html new file mode 100644 index 000000000..4a0cdd0ae --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/deflate64/package.html @@ -0,0 +1,25 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides a stream that allows decompressing streams using the + DEFLATE64(tm) algorithm. DEFLATE64 is a trademark of PKWARE, + Inc.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java new file mode 100644 index 000000000..9e05f8bcb --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.gzip; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.EOFException; +import java.io.InputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.BufferedInputStream; +import java.util.zip.DataFormatException; +import java.util.zip.Deflater; +import java.util.zip.Inflater; +import java.util.zip.CRC32; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.compress.utils.CharsetNames; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * Input stream that decompresses .gz files. + * + * <p>This supports decompressing concatenated .gz files which is important + * when decompressing standalone .gz files.</p> + * + * <p> + * {@link java.util.zip.GZIPInputStream} doesn't decompress concatenated .gz + * files: it stops after the first member and silently ignores the rest. + * It doesn't leave the read position to point to the beginning of the next + * member, which makes it difficult workaround the lack of concatenation + * support. + * </p> + * + * <p> + * Instead of using <code>GZIPInputStream</code>, this class has its own .gz + * container format decoder. The actual decompression is done with + * {@link java.util.zip.Inflater}. + * </p> + * + * <p>If you use the constructor {@code GzipCompressorInputStream(in)} + * or {@code GzipCompressorInputStream(in, false)} with some {@code + * InputStream} {@code in} then {@link #read} will return -1 as soon + * as the first internal member has been read completely. The stream + * {@code in} will be positioned at the start of the second gzip + * member if there is one.</p> + * + * <p>If you use the constructor {@code GzipCompressorInputStream(in, + * true)} with some {@code InputStream} {@code in} then {@link #read} + * will return -1 once the stream {@code in} has been exhausted. The + * data read from a stream constructed this way will consist of the + * concatenated data of all gzip members contained inside {@code + * in}.</p> + * + * @see "https://tools.ietf.org/html/rfc1952" + */ +public class GzipCompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + // Header flags + // private static final int FTEXT = 0x01; // Uninteresting for us + private static final int FHCRC = 0x02; + private static final int FEXTRA = 0x04; + private static final int FNAME = 0x08; + private static final int FCOMMENT = 0x10; + private static final int FRESERVED = 0xE0; + + private final CountingInputStream countingStream; + + // Compressed input stream, possibly wrapped in a + // BufferedInputStream, always wrapped in countingStream above + private final InputStream in; + + // True if decompressing multi member streams. + private final boolean decompressConcatenated; + + // Buffer to hold the input data + private final byte[] buf = new byte[8192]; + + // Amount of data in buf. + private int bufUsed; + + // Decompressor + private Inflater inf = new Inflater(true); + + // CRC32 from uncompressed data + private final CRC32 crc = new CRC32(); + + // True once everything has been decompressed + private boolean endReached = false; + + // used in no-arg read method + private final byte[] oneByte = new byte[1]; + + private final GzipParameters parameters = new GzipParameters(); + + /** + * Constructs a new input stream that decompresses gzip-compressed data + * from the specified input stream. + * <p> + * This is equivalent to + * <code>GzipCompressorInputStream(inputStream, false)</code> and thus + * will not decompress concatenated .gz files. + * + * @param inputStream the InputStream from which this object should + * be created of + * + * @throws IOException if the stream could not be created + */ + public GzipCompressorInputStream(final InputStream inputStream) + throws IOException { + this(inputStream, false); + } + + /** + * Constructs a new input stream that decompresses gzip-compressed data + * from the specified input stream. + * <p> + * If <code>decompressConcatenated</code> is {@code false}: + * This decompressor might read more input than it will actually use. + * If <code>inputStream</code> supports <code>mark</code> and + * <code>reset</code>, then the input position will be adjusted + * so that it is right after the last byte of the compressed stream. + * If <code>mark</code> isn't supported, the input position will be + * undefined. + * + * @param inputStream the InputStream from which this object should + * be created of + * @param decompressConcatenated + * if true, decompress until the end of the input; + * if false, stop after the first .gz member + * + * @throws IOException if the stream could not be created + */ + public GzipCompressorInputStream(final InputStream inputStream, + final boolean decompressConcatenated) + throws IOException { + countingStream = new CountingInputStream(inputStream); + // Mark support is strictly needed for concatenated files only, + // but it's simpler if it is always available. + if (countingStream.markSupported()) { + in = countingStream; + } else { + in = new BufferedInputStream(countingStream); + } + + this.decompressConcatenated = decompressConcatenated; + init(true); + } + + /** + * Provides the stream's meta data - may change with each stream + * when decompressing concatenated streams. + * @return the stream's meta data + * @since 1.8 + */ + public GzipParameters getMetaData() { + return parameters; + } + + private boolean init(final boolean isFirstMember) throws IOException { + assert isFirstMember || decompressConcatenated; + + // Check the magic bytes without a possibility of EOFException. + final int magic0 = in.read(); + final int magic1 = in.read(); + + // If end of input was reached after decompressing at least + // one .gz member, we have reached the end of the file successfully. + if (magic0 == -1 && !isFirstMember) { + return false; + } + + if (magic0 != 31 || magic1 != 139) { + throw new IOException(isFirstMember + ? "Input is not in the .gz format" + : "Garbage after a valid .gz stream"); + } + + // Parsing the rest of the header may throw EOFException. + final DataInput inData = new DataInputStream(in); + final int method = inData.readUnsignedByte(); + if (method != Deflater.DEFLATED) { + throw new IOException("Unsupported compression method " + + method + " in the .gz header"); + } + + final int flg = inData.readUnsignedByte(); + if ((flg & FRESERVED) != 0) { + throw new IOException( + "Reserved flags are set in the .gz header"); + } + + parameters.setModificationTime(ByteUtils.fromLittleEndian(inData, 4) * 1000); + switch (inData.readUnsignedByte()) { // extra flags + case 2: + parameters.setCompressionLevel(Deflater.BEST_COMPRESSION); + break; + case 4: + parameters.setCompressionLevel(Deflater.BEST_SPEED); + break; + default: + // ignored for now + break; + } + parameters.setOperatingSystem(inData.readUnsignedByte()); + + // Extra field, ignored + if ((flg & FEXTRA) != 0) { + int xlen = inData.readUnsignedByte(); + xlen |= inData.readUnsignedByte() << 8; + + // This isn't as efficient as calling in.skip would be, + // but it's lazier to handle unexpected end of input this way. + // Most files don't have an extra field anyway. + while (xlen-- > 0) { + inData.readUnsignedByte(); + } + } + + // Original file name + if ((flg & FNAME) != 0) { + parameters.setFilename(new String(readToNull(inData), + CharsetNames.ISO_8859_1)); + } + + // Comment + if ((flg & FCOMMENT) != 0) { + parameters.setComment(new String(readToNull(inData), + CharsetNames.ISO_8859_1)); + } + + // Header "CRC16" which is actually a truncated CRC32 (which isn't + // as good as real CRC16). I don't know if any encoder implementation + // sets this, so it's not worth trying to verify it. GNU gzip 1.4 + // doesn't support this field, but zlib seems to be able to at least + // skip over it. + if ((flg & FHCRC) != 0) { + inData.readShort(); + } + + // Reset + inf.reset(); + crc.reset(); + + return true; + } + + private static byte[] readToNull(final DataInput inData) throws IOException { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int b = 0; + while ((b = inData.readUnsignedByte()) != 0x00) { // NOPMD + bos.write(b); + } + return bos.toByteArray(); + } + + @Override + public int read() throws IOException { + return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; + } + + /** + * {@inheritDoc} + * + * @since 1.1 + */ + @Override + public int read(final byte[] b, int off, int len) throws IOException { + if (endReached) { + return -1; + } + + int size = 0; + + while (len > 0) { + if (inf.needsInput()) { + // Remember the current position because we may need to + // rewind after reading too much input. + in.mark(buf.length); + + bufUsed = in.read(buf); + if (bufUsed == -1) { + throw new EOFException(); + } + + inf.setInput(buf, 0, bufUsed); + } + + int ret; + try { + ret = inf.inflate(b, off, len); + } catch (final DataFormatException e) { + throw new IOException("Gzip-compressed data is corrupt"); + } + + crc.update(b, off, ret); + off += ret; + len -= ret; + size += ret; + count(ret); + + if (inf.finished()) { + // We may have read too many bytes. Rewind the read + // position to match the actual amount used. + // + // NOTE: The "if" is there just in case. Since we used + // in.mark earlier, it should always skip enough. + in.reset(); + + final int skipAmount = bufUsed - inf.getRemaining(); + if (IOUtils.skip(in, skipAmount) != skipAmount) { + throw new IOException(); + } + + bufUsed = 0; + + final DataInput inData = new DataInputStream(in); + + // CRC32 + final long crcStored = ByteUtils.fromLittleEndian(inData, 4); + + if (crcStored != crc.getValue()) { + throw new IOException("Gzip-compressed data is corrupt " + + "(CRC32 error)"); + } + + // Uncompressed size modulo 2^32 (ISIZE in the spec) + final long isize = ByteUtils.fromLittleEndian(inData, 4); + + if (isize != (inf.getBytesWritten() & 0xffffffffL)) { + throw new IOException("Gzip-compressed data is corrupt" + + "(uncompressed size mismatch)"); + } + + // See if this is the end of the file. + if (!decompressConcatenated || !init(false)) { + inf.end(); + inf = null; + endReached = true; + return size == 0 ? -1 : size; + } + } + } + + return size; + } + + /** + * Checks if the signature matches what is expected for a .gz file. + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true if this is a .gz stream, false otherwise + * + * @since 1.1 + */ + public static boolean matches(final byte[] signature, final int length) { + return length >= 2 && signature[0] == 31 && signature[1] == -117; + } + + /** + * Closes the input stream (unless it is System.in). + * + * @since 1.2 + */ + @Override + public void close() throws IOException { + if (inf != null) { + inf.end(); + inf = null; + } + + if (this.in != System.in) { + this.in.close(); + } + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return countingStream.getBytesRead(); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java new file mode 100644 index 000000000..bb5e1778f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.gzip; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.zip.CRC32; +import java.util.zip.Deflater; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.compress.utils.CharsetNames; + +/** + * Compressed output stream using the gzip format. This implementation improves + * over the standard {@link GZIPOutputStream} class by allowing + * the configuration of the compression level and the header metadata (filename, + * comment, modification time, operating system and extra flags). + * + * @see <a href="https://tools.ietf.org/html/rfc1952">GZIP File Format Specification</a> + */ +public class GzipCompressorOutputStream extends CompressorOutputStream { + + /** Header flag indicating a file name follows the header */ + private static final int FNAME = 1 << 3; + + /** Header flag indicating a comment follows the header */ + private static final int FCOMMENT = 1 << 4; + + /** The underlying stream */ + private final OutputStream out; + + /** Deflater used to compress the data */ + private final Deflater deflater; + + /** The buffer receiving the compressed data from the deflater */ + private final byte[] deflateBuffer = new byte[512]; + + /** Indicates if the stream has been closed */ + private boolean closed; + + /** The checksum of the uncompressed data */ + private final CRC32 crc = new CRC32(); + + /** + * Creates a gzip compressed output stream with the default parameters. + * @param out the stream to compress to + * @throws IOException if writing fails + */ + public GzipCompressorOutputStream(final OutputStream out) throws IOException { + this(out, new GzipParameters()); + } + + /** + * Creates a gzip compressed output stream with the specified parameters. + * @param out the stream to compress to + * @param parameters the parameters to use + * @throws IOException if writing fails + * + * @since 1.7 + */ + public GzipCompressorOutputStream(final OutputStream out, final GzipParameters parameters) throws IOException { + this.out = out; + this.deflater = new Deflater(parameters.getCompressionLevel(), true); + + writeHeader(parameters); + } + + private void writeHeader(final GzipParameters parameters) throws IOException { + final String filename = parameters.getFilename(); + final String comment = parameters.getComment(); + + final ByteBuffer buffer = ByteBuffer.allocate(10); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putShort((short) GZIPInputStream.GZIP_MAGIC); + buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate) + buffer.put((byte) ((filename != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags + buffer.putInt((int) (parameters.getModificationTime() / 1000)); + + // extra flags + final int compressionLevel = parameters.getCompressionLevel(); + if (compressionLevel == Deflater.BEST_COMPRESSION) { + buffer.put((byte) 2); + } else if (compressionLevel == Deflater.BEST_SPEED) { + buffer.put((byte) 4); + } else { + buffer.put((byte) 0); + } + + buffer.put((byte) parameters.getOperatingSystem()); + + out.write(buffer.array()); + + if (filename != null) { + out.write(filename.getBytes(CharsetNames.ISO_8859_1)); + out.write(0); + } + + if (comment != null) { + out.write(comment.getBytes(CharsetNames.ISO_8859_1)); + out.write(0); + } + } + + private void writeTrailer() throws IOException { + final ByteBuffer buffer = ByteBuffer.allocate(8); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt((int) crc.getValue()); + buffer.putInt(deflater.getTotalIn()); + + out.write(buffer.array()); + } + + @Override + public void write(final int b) throws IOException { + write(new byte[]{(byte) (b & 0xff)}, 0, 1); + } + + /** + * {@inheritDoc} + * + * @since 1.1 + */ + @Override + public void write(final byte[] buffer) throws IOException { + write(buffer, 0, buffer.length); + } + + /** + * {@inheritDoc} + * + * @since 1.1 + */ + @Override + public void write(final byte[] buffer, final int offset, final int length) throws IOException { + if (deflater.finished()) { + throw new IOException("Cannot write more data, the end of the compressed data stream has been reached"); + + } else if (length > 0) { + deflater.setInput(buffer, offset, length); + + while (!deflater.needsInput()) { + deflate(); + } + + crc.update(buffer, offset, length); + } + } + + private void deflate() throws IOException { + final int length = deflater.deflate(deflateBuffer, 0, deflateBuffer.length); + if (length > 0) { + out.write(deflateBuffer, 0, length); + } + } + + /** + * Finishes writing compressed data to the underlying stream without closing it. + * + * @since 1.7 + * @throws IOException on error + */ + public void finish() throws IOException { + if (!deflater.finished()) { + deflater.finish(); + + while (!deflater.finished()) { + deflate(); + } + + writeTrailer(); + } + } + + /** + * {@inheritDoc} + * + * @since 1.7 + */ + @Override + public void flush() throws IOException { + out.flush(); + } + + @Override + public void close() throws IOException { + if (!closed) { + try { + finish(); + } finally { + deflater.end(); + out.close(); + closed = true; + } + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java new file mode 100644 index 000000000..3887a686e --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.gzip; + +import java.util.zip.Deflater; + +/** + * Parameters for the GZIP compressor. + * + * @since 1.7 + */ +public class GzipParameters { + + private int compressionLevel = Deflater.DEFAULT_COMPRESSION; + private long modificationTime; + private String filename; + private String comment; + private int operatingSystem = 255; // Unknown OS by default + + public int getCompressionLevel() { + return compressionLevel; + } + + /** + * Sets the compression level. + * + * @param compressionLevel the compression level (between 0 and 9) + * @see Deflater#NO_COMPRESSION + * @see Deflater#BEST_SPEED + * @see Deflater#DEFAULT_COMPRESSION + * @see Deflater#BEST_COMPRESSION + */ + public void setCompressionLevel(final int compressionLevel) { + if (compressionLevel < -1 || compressionLevel > 9) { + throw new IllegalArgumentException("Invalid gzip compression level: " + compressionLevel); + } + this.compressionLevel = compressionLevel; + } + + public long getModificationTime() { + return modificationTime; + } + + /** + * Sets the modification time of the compressed file. + * + * @param modificationTime the modification time, in milliseconds + */ + public void setModificationTime(final long modificationTime) { + this.modificationTime = modificationTime; + } + + public String getFilename() { + return filename; + } + + /** + * Sets the name of the compressed file. + * + * @param filename the name of the file without the directory path + */ + public void setFilename(final String filename) { + this.filename = filename; + } + + public String getComment() { + return comment; + } + + public void setComment(final String comment) { + this.comment = comment; + } + + public int getOperatingSystem() { + return operatingSystem; + } + + /** + * Sets the operating system on which the compression took place. + * The defined values are: + * <ul> + * <li>0: FAT filesystem (MS-DOS, OS/2, NT/Win32)</li> + * <li>1: Amiga</li> + * <li>2: VMS (or OpenVMS)</li> + * <li>3: Unix</li> + * <li>4: VM/CMS</li> + * <li>5: Atari TOS</li> + * <li>6: HPFS filesystem (OS/2, NT)</li> + * <li>7: Macintosh</li> + * <li>8: Z-System</li> + * <li>9: CP/M</li> + * <li>10: TOPS-20</li> + * <li>11: NTFS filesystem (NT)</li> + * <li>12: QDOS</li> + * <li>13: Acorn RISCOS</li> + * <li>255: Unknown</li> + * </ul> + * + * @param operatingSystem the code of the operating system + */ + public void setOperatingSystem(final int operatingSystem) { + this.operatingSystem = operatingSystem; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipUtils.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipUtils.java new file mode 100644 index 000000000..0edf65571 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipUtils.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.gzip; + +import java.util.LinkedHashMap; +import java.util.Map; +import org.apache.commons.compress.compressors.FileNameUtil; + +/** + * Utility code for the gzip compression format. + * @ThreadSafe + */ +public class GzipUtils { + + private static final FileNameUtil fileNameUtil; + + static { + // using LinkedHashMap so .tgz is preferred over .taz as + // compressed extension of .tar as FileNameUtil will use the + // first one found + final Map<String, String> uncompressSuffix = + new LinkedHashMap<>(); + uncompressSuffix.put(".tgz", ".tar"); + uncompressSuffix.put(".taz", ".tar"); + uncompressSuffix.put(".svgz", ".svg"); + uncompressSuffix.put(".cpgz", ".cpio"); + uncompressSuffix.put(".wmz", ".wmf"); + uncompressSuffix.put(".emz", ".emf"); + uncompressSuffix.put(".gz", ""); + uncompressSuffix.put(".z", ""); + uncompressSuffix.put("-gz", ""); + uncompressSuffix.put("-z", ""); + uncompressSuffix.put("_z", ""); + fileNameUtil = new FileNameUtil(uncompressSuffix, ".gz"); + } + + /** Private constructor to prevent instantiation of this utility class. */ + private GzipUtils() { + } + + /** + * Detects common gzip suffixes in the given filename. + * + * @param filename name of a file + * @return {@code true} if the filename has a common gzip suffix, + * {@code false} otherwise + */ + public static boolean isCompressedFilename(final String filename) { + return fileNameUtil.isCompressedFilename(filename); + } + + /** + * Maps the given name of a gzip-compressed file to the name that the + * file should have after uncompression. Commonly used file type specific + * suffixes like ".tgz" or ".svgz" are automatically detected and + * correctly mapped. For example the name "package.tgz" is mapped to + * "package.tar". And any filenames with the generic ".gz" suffix + * (or any other generic gzip suffix) is mapped to a name without that + * suffix. If no gzip suffix is detected, then the filename is returned + * unmapped. + * + * @param filename name of a file + * @return name of the corresponding uncompressed file + */ + public static String getUncompressedFilename(final String filename) { + return fileNameUtil.getUncompressedFilename(filename); + } + + /** + * Maps the given filename to the name that the file should have after + * compression with gzip. Common file types with custom suffixes for + * compressed versions are automatically detected and correctly mapped. + * For example the name "package.tar" is mapped to "package.tgz". If no + * custom mapping is applicable, then the default ".gz" suffix is appended + * to the filename. + * + * @param filename name of a file + * @return name of the corresponding compressed file + */ + public static String getCompressedFilename(final String filename) { + return fileNameUtil.getCompressedFilename(filename); + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/package.html b/src/main/java/org/apache/commons/compress/compressors/gzip/package.html new file mode 100644 index 000000000..e18b50f2f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/package.html @@ -0,0 +1,29 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for compressing and decompressing + streams using the GZip algorithm.</p> + + <p>The classes in this package are wrappers around {@link + java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} and + {@link java.util.zip.GZIPOutputStream + java.util.zip.GZIPOutputStream}.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java new file mode 100644 index 000000000..a52dc6015 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream; +import org.apache.commons.compress.utils.ByteUtils; + +/** + * CompressorInputStream for the LZ4 block format. + * + * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a> + * @since 1.14 + * @NotThreadSafe + */ +public class BlockLZ4CompressorInputStream extends AbstractLZ77CompressorInputStream { + + static final int WINDOW_SIZE = 1 << 16; + static final int SIZE_BITS = 4; + static final int BACK_REFERENCE_SIZE_MASK = (1 << SIZE_BITS) - 1; + static final int LITERAL_SIZE_MASK = BACK_REFERENCE_SIZE_MASK << SIZE_BITS; + + /** Back-Reference-size part of the block starting byte. */ + private int nextBackReferenceSize; + + /** Current state of the stream */ + private State state = State.NO_BLOCK; + + /** + * Creates a new LZ4 input stream. + * + * @param is + * An InputStream to read compressed data from + * + * @throws IOException if reading fails + */ + public BlockLZ4CompressorInputStream(final InputStream is) throws IOException { + super(is, WINDOW_SIZE); + } + + /** + * {@inheritDoc} + */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + switch (state) { + case EOF: + return -1; + case NO_BLOCK: // NOSONAR - fallthrough intended + readSizes(); + /*FALLTHROUGH*/ + case IN_LITERAL: + int litLen = readLiteral(b, off, len); + if (!hasMoreDataInBlock()) { + state = State.LOOKING_FOR_BACK_REFERENCE; + } + return litLen > 0 ? litLen : read(b, off, len); + case LOOKING_FOR_BACK_REFERENCE: // NOSONAR - fallthrough intended + if (!initializeBackReference()) { + state = State.EOF; + return -1; + } + /*FALLTHROUGH*/ + case IN_BACK_REFERENCE: + int backReferenceLen = readBackReference(b, off, len); + if (!hasMoreDataInBlock()) { + state = State.NO_BLOCK; + } + return backReferenceLen > 0 ? backReferenceLen : read(b, off, len); + default: + throw new IOException("Unknown stream state " + state); + } + } + + private void readSizes() throws IOException { + int nextBlock = readOneByte(); + if (nextBlock == -1) { + throw new IOException("Premature end of stream while looking for next block"); + } + nextBackReferenceSize = nextBlock & BACK_REFERENCE_SIZE_MASK; + long literalSizePart = (nextBlock & LITERAL_SIZE_MASK) >> SIZE_BITS; + if (literalSizePart == BACK_REFERENCE_SIZE_MASK) { + literalSizePart += readSizeBytes(); + } + startLiteral(literalSizePart); + state = State.IN_LITERAL; + } + + private long readSizeBytes() throws IOException { + long accum = 0; + int nextByte; + do { + nextByte = readOneByte(); + if (nextByte == -1) { + throw new IOException("Premature end of stream while parsing length"); + } + accum += nextByte; + } while (nextByte == 255); + return accum; + } + + /** + * @return false if there is no more back-reference - this means this is the + * last block of the stream. + */ + private boolean initializeBackReference() throws IOException { + int backReferenceOffset = 0; + try { + backReferenceOffset = (int) ByteUtils.fromLittleEndian(supplier, 2); + } catch (IOException ex) { + if (nextBackReferenceSize == 0) { // the last block has no back-reference + return false; + } + throw ex; + } + long backReferenceSize = nextBackReferenceSize; + if (nextBackReferenceSize == BACK_REFERENCE_SIZE_MASK) { + backReferenceSize += readSizeBytes(); + } + // minimal match length 4 is encoded as 0 + startBackReference(backReferenceOffset, backReferenceSize + 4); + state = State.IN_BACK_REFERENCE; + return true; + } + + private enum State { + NO_BLOCK, IN_LITERAL, LOOKING_FOR_BACK_REFERENCE, IN_BACK_REFERENCE, EOF + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java new file mode 100644 index 000000000..7f243003d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java @@ -0,0 +1,508 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.Deque; +import java.util.Iterator; +import java.util.LinkedList; + +import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.compress.compressors.lz77support.LZ77Compressor; +import org.apache.commons.compress.compressors.lz77support.Parameters; +import org.apache.commons.compress.utils.ByteUtils; + +/** + * CompressorOutputStream for the LZ4 block format. + * + * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a> + * @since 1.14 + * @NotThreadSafe + */ +public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { + + private static final int MIN_BACK_REFERENCE_LENGTH = 4; + private static final int MIN_OFFSET_OF_LAST_BACK_REFERENCE = 12; + + /* + + The LZ4 block format has a few properties that make it less + straight-forward than one would hope: + + * literal blocks and back-references must come in pairs (except + for the very last literal block), so consecutive literal + blocks created by the compressor must be merged into a single + block. + + * the start of a literal/back-reference pair contains the length + of the back-reference (at least some part of it) so we can't + start writing the literal before we know how long the next + back-reference is going to be. + + * there are special rules for the final blocks + + > There are specific parsing rules to respect in order to remain + > compatible with assumptions made by the decoder : + > + > 1. The last 5 bytes are always literals + > + > 2. The last match must start at least 12 bytes before end of + > block. Consequently, a block with less than 13 bytes cannot be + > compressed. + + which means any back-reference may need to get rewritten as a + literal block unless we know the next block is at least of + length 5 and the sum of this block's length and offset and the + next block's length is at least twelve. + + */ + + private final LZ77Compressor compressor; + private final OutputStream os; + + // used in one-arg write method + private final byte[] oneByte = new byte[1]; + + private boolean finished = false; + + private Deque<Pair> pairs = new LinkedList<>(); + // keeps track of the last window-size bytes (64k) in order to be + // able to expand back-references when needed + private Deque<byte[]> expandedBlocks = new LinkedList<>(); + + /** + * Creates a new LZ4 output stream. + * + * @param os + * An OutputStream to read compressed data from + * + * @throws IOException if reading fails + */ + public BlockLZ4CompressorOutputStream(final OutputStream os) throws IOException { + this(os, createParameterBuilder().build()); + } + + /** + * Creates a new LZ4 output stream. + * + * @param os + * An OutputStream to read compressed data from + * @param params + * The parameters to use for LZ77 compression. + * + * @throws IOException if reading fails + */ + public BlockLZ4CompressorOutputStream(final OutputStream os, Parameters params) throws IOException { + this.os = os; + compressor = new LZ77Compressor(params, + new LZ77Compressor.Callback() { + @Override + public void accept(LZ77Compressor.Block block) throws IOException { + switch (block.getType()) { + case LITERAL: + addLiteralBlock((LZ77Compressor.LiteralBlock) block); + break; + case BACK_REFERENCE: + addBackReference((LZ77Compressor.BackReference) block); + break; + case EOD: + writeFinalLiteralBlock(); + break; + } + } + }); + } + + @Override + public void write(int b) throws IOException { + oneByte[0] = (byte) (b & 0xff); + write(oneByte); + } + + @Override + public void write(byte[] data, int off, int len) throws IOException { + compressor.compress(data, off, len); + } + + @Override + public void close() throws IOException { + try { + finish(); + } finally { + os.close(); + } + } + + /** + * Compresses all remaining data and writes it to the stream, + * doesn't close the underlying stream. + * @throws IOException if an error occurs + */ + public void finish() throws IOException { + if (!finished) { + compressor.finish(); + finished = true; + } + } + + /** + * Adds some initial data to fill the window with. + * + * @param data the data to fill the window with. + * @param off offset of real data into the array + * @param len amount of data + * @throws IllegalStateException if the stream has already started to write data + * @see LZ77Compressor#prefill + */ + public void prefill(byte[] data, int off, int len) { + if (len > 0) { + byte[] b = Arrays.copyOfRange(data, off, off + len); + compressor.prefill(b); + recordLiteral(b); + } + } + + private void addLiteralBlock(LZ77Compressor.LiteralBlock block) throws IOException { + Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength()); + recordLiteral(last.addLiteral(block)); + clearUnusedBlocksAndPairs(); + } + + private void addBackReference(LZ77Compressor.BackReference block) throws IOException { + Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength()); + last.setBackReference(block); + recordBackReference(block); + clearUnusedBlocksAndPairs(); + } + + private Pair writeBlocksAndReturnUnfinishedPair(int length) throws IOException { + writeWritablePairs(length); + Pair last = pairs.peekLast(); + if (last == null || last.hasBackReference()) { + last = new Pair(); + pairs.addLast(last); + } + return last; + } + + private void recordLiteral(byte[] b) { + expandedBlocks.addFirst(b); + } + + private void clearUnusedBlocksAndPairs() { + clearUnusedBlocks(); + clearUnusedPairs(); + } + + private void clearUnusedBlocks() { + int blockLengths = 0; + int blocksToKeep = 0; + for (byte[] b : expandedBlocks) { + blocksToKeep++; + blockLengths += b.length; + if (blockLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) { + break; + } + } + final int size = expandedBlocks.size(); + for (int i = blocksToKeep; i < size; i++) { + expandedBlocks.removeLast(); + } + } + + private void recordBackReference(LZ77Compressor.BackReference block) { + expandedBlocks.addFirst(expand(block.getOffset(), block.getLength())); + } + + private byte[] expand(final int offset, final int length) { + byte[] expanded = new byte[length]; + if (offset == 1) { // surprisingly common special case + byte[] block = expandedBlocks.peekFirst(); + byte b = block[block.length - 1]; + if (b != 0) { // the fresh array contains 0s anyway + Arrays.fill(expanded, b); + } + } else { + expandFromList(expanded, offset, length); + } + return expanded; + } + + private void expandFromList(final byte[] expanded, int offset, int length) { + int offsetRemaining = offset; + int lengthRemaining = length; + int writeOffset = 0; + while (lengthRemaining > 0) { + // find block that contains offsetRemaining + byte[] block = null; + int copyLen, copyOffset; + if (offsetRemaining > 0) { + int blockOffset = 0; + for (byte[] b : expandedBlocks) { + if (b.length + blockOffset >= offsetRemaining) { + block = b; + break; + } + blockOffset += b.length; + } + if (block == null) { + // should not be possible + throw new IllegalStateException("failed to find a block containing offset " + offset); + } + copyOffset = blockOffset + block.length - offsetRemaining; + copyLen = Math.min(lengthRemaining, block.length - copyOffset); + } else { + // offsetRemaining is negative or 0 and points into the expanded bytes + block = expanded; + copyOffset = -offsetRemaining; + copyLen = Math.min(lengthRemaining, writeOffset + offsetRemaining); + } + System.arraycopy(block, copyOffset, expanded, writeOffset, copyLen); + offsetRemaining -= copyLen; + lengthRemaining -= copyLen; + writeOffset += copyLen; + } + } + + private void clearUnusedPairs() { + int pairLengths = 0; + int pairsToKeep = 0; + for (Iterator<Pair> it = pairs.descendingIterator(); it.hasNext(); ) { + Pair p = it.next(); + pairsToKeep++; + pairLengths += p.length(); + if (pairLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) { + break; + } + } + final int size = pairs.size(); + for (int i = pairsToKeep; i < size; i++) { + Pair p = pairs.peekFirst(); + if (p.hasBeenWritten()) { + pairs.removeFirst(); + } else { + break; + } + } + } + + private void writeFinalLiteralBlock() throws IOException { + rewriteLastPairs(); + for (Pair p : pairs) { + if (!p.hasBeenWritten()) { + p.writeTo(os); + } + } + pairs.clear(); + } + + private void writeWritablePairs(int lengthOfBlocksAfterLastPair) throws IOException { + int unwrittenLength = lengthOfBlocksAfterLastPair; + for (Iterator<Pair> it = pairs.descendingIterator(); it.hasNext(); ) { + Pair p = it.next(); + if (p.hasBeenWritten()) { + break; + } + unwrittenLength += p.length(); + } + for (Pair p : pairs) { + if (p.hasBeenWritten()) { + continue; + } + unwrittenLength -= p.length(); + if (p.canBeWritten(unwrittenLength)) { + p.writeTo(os); + } else { + break; + } + } + } + + private void rewriteLastPairs() { + LinkedList<Pair> lastPairs = new LinkedList<>(); + LinkedList<Integer> pairLength = new LinkedList<>(); + int offset = 0; + for (Iterator<Pair> it = pairs.descendingIterator(); it.hasNext(); ) { + Pair p = it.next(); + if (p.hasBeenWritten()) { + break; + } + int len = p.length(); + pairLength.addFirst(len); + lastPairs.addFirst(p); + offset += len; + if (offset >= MIN_OFFSET_OF_LAST_BACK_REFERENCE) { + break; + } + } + for (Pair p : lastPairs) { + pairs.remove(p); + } + // lastPairs may contain between one and four Pairs: + // * the last pair may be a one byte literal + // * all other Pairs contain a back-reference which must be four bytes long at minimum + // we could merge them all into a single literal block but + // this may harm compression. For example compressing + // "bla.tar" from our tests yields a last block containing a + // back-reference of length > 2k and we'd end up with a last + // literal of that size rather than a 2k back-reference and a + // 12 byte literal at the end. + + // Instead we merge all but the first of lastPairs into a new + // literal-only Pair "replacement" and look at the + // back-reference in the first of lastPairs and see if we can + // split it. We can split it if it is longer than 16 - + // replacement.length (i.e. the minimal length of four is kept + // while making sure the last literal is at least twelve bytes + // long). If we can't split it, we expand the first of the pairs + // as well. + + // this is not optimal, we could get better compression + // results with more complex approaches as the last literal + // only needs to be five bytes long if the previous + // back-reference has an offset big enough + + final int lastPairsSize = lastPairs.size(); + int toExpand = 0; + for (int i = 1; i < lastPairsSize; i++) { + toExpand += pairLength.get(i); + } + Pair replacement = new Pair(); + if (toExpand > 0) { + replacement.prependLiteral(expand(toExpand, toExpand)); + } + Pair splitCandidate = lastPairs.get(0); + int stillNeeded = MIN_OFFSET_OF_LAST_BACK_REFERENCE - toExpand; + int brLen = splitCandidate.hasBackReference() ? splitCandidate.backReferenceLength() : 0; + if (splitCandidate.hasBackReference() && brLen >= MIN_BACK_REFERENCE_LENGTH + stillNeeded) { + replacement.prependLiteral(expand(toExpand + stillNeeded, stillNeeded)); + pairs.add(splitCandidate.splitWithNewBackReferenceLengthOf(brLen - stillNeeded)); + } else { + if (splitCandidate.hasBackReference()) { + replacement.prependLiteral(expand(toExpand + brLen, brLen)); + } + splitCandidate.prependTo(replacement); + } + pairs.add(replacement); + } + + /** + * Returns a builder correctly configured for the LZ4 algorithm. + * @return a builder correctly configured for the LZ4 algorithm + */ + public static Parameters.Builder createParameterBuilder() { + int maxLen = BlockLZ4CompressorInputStream.WINDOW_SIZE - 1; + return Parameters.builder(BlockLZ4CompressorInputStream.WINDOW_SIZE) + .withMinBackReferenceLength(MIN_BACK_REFERENCE_LENGTH) + .withMaxBackReferenceLength(maxLen) + .withMaxOffset(maxLen) + .withMaxLiteralLength(maxLen); + } + + final static class Pair { + private final Deque<byte[]> literals = new LinkedList<>(); + private int brOffset, brLength; + private boolean written; + + private void prependLiteral(byte[] data) { + literals.addFirst(data); + } + byte[] addLiteral(LZ77Compressor.LiteralBlock block) { + byte[] copy = Arrays.copyOfRange(block.getData(), block.getOffset(), + block.getOffset() + block.getLength()); + literals.add(copy); + return copy; + } + void setBackReference(LZ77Compressor.BackReference block) { + if (hasBackReference()) { + throw new IllegalStateException(); + } + brOffset = block.getOffset(); + brLength = block.getLength(); + } + boolean hasBackReference() { + return brOffset > 0; + } + boolean canBeWritten(int lengthOfBlocksAfterThisPair) { + return hasBackReference() + && lengthOfBlocksAfterThisPair >= MIN_OFFSET_OF_LAST_BACK_REFERENCE + MIN_BACK_REFERENCE_LENGTH; + } + int length() { + return literalLength() + brLength; + } + private boolean hasBeenWritten() { + return written; + } + void writeTo(OutputStream out) throws IOException { + int litLength = literalLength(); + out.write(lengths(litLength, brLength)); + if (litLength >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) { + writeLength(litLength - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out); + } + for (byte[] b : literals) { + out.write(b); + } + if (hasBackReference()) { + ByteUtils.toLittleEndian(out, brOffset, 2); + if (brLength - MIN_BACK_REFERENCE_LENGTH >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) { + writeLength(brLength - MIN_BACK_REFERENCE_LENGTH + - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out); + } + } + written = true; + } + private int literalLength() { + int length = 0; + for (byte[] b : literals) { + length += b.length; + } + return length; + } + private static int lengths(int litLength, int brLength) { + int l = litLength < 15 ? litLength : 15; + int br = brLength < 4 ? 0 : (brLength < 19 ? brLength - 4 : 15); + return (l << BlockLZ4CompressorInputStream.SIZE_BITS) | br; + } + private static void writeLength(int length, OutputStream out) throws IOException { + while (length >= 255) { + out.write(255); + length -= 255; + } + out.write(length); + } + private int backReferenceLength() { + return brLength; + } + private void prependTo(Pair other) { + Iterator<byte[]> listBackwards = literals.descendingIterator(); + while (listBackwards.hasNext()) { + other.prependLiteral(listBackwards.next()); + } + } + private Pair splitWithNewBackReferenceLengthOf(int newBackReferenceLength) { + Pair p = new Pair(); + p.literals.addAll(literals); + p.brOffset = brOffset; + p.brLength = newBackReferenceLength; + return p; + } + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorInputStream.java new file mode 100644 index 000000000..6cb66ec3d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorInputStream.java @@ -0,0 +1,404 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.BoundedInputStream; +import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.compress.utils.ChecksumCalculatingInputStream; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * CompressorInputStream for the LZ4 frame format. + * + * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> + * + * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> + * @since 1.14 + * @NotThreadSafe + */ +public class FramedLZ4CompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + // used by FramedLZ4CompressorOutputStream as well + static final byte[] LZ4_SIGNATURE = new byte[] { //NOSONAR + 4, 0x22, 0x4d, 0x18 + }; + private static final byte[] SKIPPABLE_FRAME_TRAILER = new byte[] { + 0x2a, 0x4d, 0x18 + }; + private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50; + + static final int VERSION_MASK = 0xC0; + static final int SUPPORTED_VERSION = 0x40; + static final int BLOCK_INDEPENDENCE_MASK = 0x20; + static final int BLOCK_CHECKSUM_MASK = 0x10; + static final int CONTENT_SIZE_MASK = 0x08; + static final int CONTENT_CHECKSUM_MASK = 0x04; + static final int BLOCK_MAX_SIZE_MASK = 0x70; + static final int UNCOMPRESSED_FLAG_MASK = 0x80000000; + + // used in no-arg read method + private final byte[] oneByte = new byte[1]; + + private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { + @Override + public int getAsByte() throws IOException { + return readOneByte(); + } + }; + + private final CountingInputStream in; + private final boolean decompressConcatenated; + + private boolean expectBlockChecksum; + private boolean expectBlockDependency; + private boolean expectContentSize; + private boolean expectContentChecksum; + + private InputStream currentBlock; + private boolean endReached, inUncompressed; + + // used for frame header checksum and content checksum, if present + private final XXHash32 contentHash = new XXHash32(); + + // used for block checksum, if present + private final XXHash32 blockHash = new XXHash32(); + + // only created if the frame doesn't set the block independence flag + private byte[] blockDependencyBuffer; + + /** + * Creates a new input stream that decompresses streams compressed + * using the LZ4 frame format and stops after decompressing the + * first frame. + * @param in the InputStream from which to read the compressed data + * @throws IOException if reading fails + */ + public FramedLZ4CompressorInputStream(InputStream in) throws IOException { + this(in, false); + } + + /** + * Creates a new input stream that decompresses streams compressed + * using the LZ4 frame format. + * @param in the InputStream from which to read the compressed data + * @param decompressConcatenated if true, decompress until the end + * of the input; if false, stop after the first LZ4 frame + * and leave the input position to point to the next byte + * after the frame stream + * @throws IOException if reading fails + */ + public FramedLZ4CompressorInputStream(InputStream in, boolean decompressConcatenated) throws IOException { + this.in = new CountingInputStream(in); + this.decompressConcatenated = decompressConcatenated; + init(true); + } + + /** {@inheritDoc} */ + @Override + public int read() throws IOException { + return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + try { + if (currentBlock != null) { + currentBlock.close(); + currentBlock = null; + } + } finally { + in.close(); + } + } + + /** {@inheritDoc} */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (endReached) { + return -1; + } + int r = readOnce(b, off, len); + if (r == -1) { + nextBlock(); + if (!endReached) { + r = readOnce(b, off, len); + } + } + if (r != -1) { + if (expectBlockDependency) { + appendToBlockDependencyBuffer(b, off, r); + } + if (expectContentChecksum) { + contentHash.update(b, off, r); + } + } + return r; + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return in.getBytesRead(); + } + + private void init(boolean firstFrame) throws IOException { + if (readSignature(firstFrame)) { + readFrameDescriptor(); + nextBlock(); + } + } + + private boolean readSignature(boolean firstFrame) throws IOException { + String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage"; + final byte[] b = new byte[4]; + int read = IOUtils.readFully(in, b); + count(read); + if (0 == read && !firstFrame) { + // good LZ4 frame and nothing after it + endReached = true; + return false; + } + if (4 != read) { + throw new IOException(garbageMessage); + } + + read = skipSkippableFrame(b); + if (0 == read && !firstFrame) { + // good LZ4 frame with only some skippable frames after it + endReached = true; + return false; + } + if (4 != read || !matches(b, 4)) { + throw new IOException(garbageMessage); + } + return true; + } + + private void readFrameDescriptor() throws IOException { + int flags = readOneByte(); + if (flags == -1) { + throw new IOException("Premature end of stream while reading frame flags"); + } + contentHash.update(flags); + if ((flags & VERSION_MASK) != SUPPORTED_VERSION) { + throw new IOException("Unsupported version " + (flags >> 6)); + } + expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0; + if (expectBlockDependency) { + if (blockDependencyBuffer == null) { + blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]; + } + } else { + blockDependencyBuffer = null; + } + expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0; + expectContentSize = (flags & CONTENT_SIZE_MASK) != 0; + expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0; + int bdByte = readOneByte(); + if (bdByte == -1) { // max size is irrelevant for this implementation + throw new IOException("Premature end of stream while reading frame BD byte"); + } + contentHash.update(bdByte); + if (expectContentSize) { // for now we don't care, contains the uncompressed size + byte[] contentSize = new byte[8]; + int skipped = IOUtils.readFully(in, contentSize); + count(skipped); + if (8 != skipped) { + throw new IOException("Premature end of stream while reading content size"); + } + contentHash.update(contentSize, 0, contentSize.length); + } + int headerHash = readOneByte(); + if (headerHash == -1) { // partial hash of header. + throw new IOException("Premature end of stream while reading frame header checksum"); + } + int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff); + contentHash.reset(); + if (headerHash != expectedHash) { + throw new IOException("frame header checksum mismatch."); + } + } + + private void nextBlock() throws IOException { + maybeFinishCurrentBlock(); + long len = ByteUtils.fromLittleEndian(supplier, 4); + boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0; + int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK)); + if (realLen == 0) { + verifyContentChecksum(); + if (!decompressConcatenated) { + endReached = true; + } else { + init(false); + } + return; + } + InputStream capped = new BoundedInputStream(in, realLen); + if (expectBlockChecksum) { + capped = new ChecksumCalculatingInputStream(blockHash, capped); + } + if (uncompressed) { + inUncompressed = true; + currentBlock = capped; + } else { + inUncompressed = false; + BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped); + if (expectBlockDependency) { + s.prefill(blockDependencyBuffer); + } + currentBlock = s; + } + } + + private void maybeFinishCurrentBlock() throws IOException { + if (currentBlock != null) { + currentBlock.close(); + currentBlock = null; + if (expectBlockChecksum) { + verifyChecksum(blockHash, "block"); + blockHash.reset(); + } + } + } + + private void verifyContentChecksum() throws IOException { + if (expectContentChecksum) { + verifyChecksum(contentHash, "content"); + } + contentHash.reset(); + } + + private void verifyChecksum(XXHash32 hash, String kind) throws IOException { + byte[] checksum = new byte[4]; + int read = IOUtils.readFully(in, checksum); + count(read); + if (4 != read) { + throw new IOException("Premature end of stream while reading " + kind + " checksum"); + } + long expectedHash = hash.getValue(); + if (expectedHash != ByteUtils.fromLittleEndian(checksum)) { + throw new IOException(kind + " checksum mismatch."); + } + } + + private int readOneByte() throws IOException { + final int b = in.read(); + if (b != -1) { + count(1); + return b & 0xFF; + } + return -1; + } + + private int readOnce(byte[] b, int off, int len) throws IOException { + if (inUncompressed) { + int cnt = currentBlock.read(b, off, len); + count(cnt); + return cnt; + } + BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock; + long before = l.getBytesRead(); + int cnt = currentBlock.read(b, off, len); + count(l.getBytesRead() - before); + return cnt; + } + + private static boolean isSkippableFrameSignature(byte[] b) { + if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) { + return false; + } + for (int i = 1; i < 4; i++) { + if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) { + return false; + } + } + return true; + } + + /** + * Skips over the contents of a skippable frame as well as + * skippable frames following it. + * + * <p>It then tries to read four more bytes which are supposed to + * hold an LZ4 signature and returns the number of bytes read + * while storing the bytes in the given array.</p> + */ + private int skipSkippableFrame(byte[] b) throws IOException { + int read = 4; + while (read == 4 && isSkippableFrameSignature(b)) { + long len = ByteUtils.fromLittleEndian(supplier, 4); + long skipped = IOUtils.skip(in, len); + count(skipped); + if (len != skipped) { + throw new IOException("Premature end of stream while skipping frame"); + } + read = IOUtils.readFully(in, b); + count(read); + } + return read; + } + + private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { + len = Math.min(len, blockDependencyBuffer.length); + if (len > 0) { + int keep = blockDependencyBuffer.length - len; + if (keep > 0) { + // move last keep bytes towards the start of the buffer + System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); + } + // append new data + System.arraycopy(b, off, blockDependencyBuffer, keep, len); + } + } + + /** + * Checks if the signature matches what is expected for a .lz4 file. + * + * <p>.lz4 files start with a four byte signature.</p> + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true if this is a .sz stream, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + + if (length < LZ4_SIGNATURE.length) { + return false; + } + + byte[] shortenedSig = signature; + if (signature.length > LZ4_SIGNATURE.length) { + shortenedSig = new byte[LZ4_SIGNATURE.length]; + System.arraycopy(signature, 0, shortenedSig, 0, LZ4_SIGNATURE.length); + } + + return Arrays.equals(shortenedSig, LZ4_SIGNATURE); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java new file mode 100644 index 000000000..4bd00234e --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.compress.utils.ByteUtils; + +/** + * CompressorOutputStream for the LZ4 frame format. + * + * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> + * + * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> + * @since 1.14 + * @NotThreadSafe + */ +public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { + + private static final byte[] END_MARK = new byte[4]; + + // used in one-arg write method + private final byte[] oneByte = new byte[1]; + + private final byte[] blockData; + private final OutputStream out; + private final Parameters params; + private boolean finished = false; + private int currentIndex = 0; + + // used for frame header checksum and content checksum, if requested + private final XXHash32 contentHash = new XXHash32(); + // used for block checksum, if requested + private final XXHash32 blockHash; + + // only created if the config requires block dependency + private byte[] blockDependencyBuffer; + private int collectedBlockDependencyBytes; + + /** + * The block sizes supported by the format. + */ + public enum BlockSize { + /** Block size of 64K */ + K64(64 * 1024, 4), + /** Block size of 256K */ + K256(256 * 1024, 5), + /** Block size of 1M */ + M1(1024 * 1024, 6), + /** Block size of 4M */ + M4(4096 * 1024, 7); + + private final int size, index; + BlockSize(int size, int index) { + this.size = size; + this.index = index; + } + int getSize() { + return size; + } + int getIndex() { + return index; + } + } + + /** + * Parameters of the LZ4 frame format. + */ + public static class Parameters { + private final BlockSize blockSize; + private final boolean withContentChecksum, withBlockChecksum, withBlockDependency; + private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params; + + /** + * The default parameters of 4M block size, enabled content + * checksum, disabled block checksums and independent blocks. + * + * <p>This matches the defaults of the lz4 command line utility.</p> + */ + public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); + + /** + * Sets up custom a custom block size for the LZ4 stream but + * otherwise uses the defaults of enabled content checksum, + * disabled block checksums and independent blocks. + * @param blockSize the size of a single block. + */ + public Parameters(BlockSize blockSize) { + this(blockSize, true, false, false); + } + /** + * Sets up custom a custom block size for the LZ4 stream but + * otherwise uses the defaults of enabled content checksum, + * disabled block checksums and independent blocks. + * @param blockSize the size of a single block. + * @param lz77params parameters used to fine-tune compression, + * in particular to balance compression ratio vs compression + * speed. + */ + public Parameters(BlockSize blockSize, + org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { + this(blockSize, true, false, false, lz77params); + } + /** + * Sets up custom parameters for the LZ4 stream. + * @param blockSize the size of a single block. + * @param withContentChecksum whether to write a content checksum + * @param withBlockChecksum whether to write a block checksum. + * Note that block checksums are not supported by the lz4 + * command line utility + * @param withBlockDependency whether a block may depend on + * the content of a previous block. Enabling this may improve + * compression ratio but makes it impossible to decompress the + * output in parallel. + */ + public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum, + boolean withBlockDependency) { + this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, + BlockLZ4CompressorOutputStream.createParameterBuilder().build()); + } + + /** + * Sets up custom parameters for the LZ4 stream. + * @param blockSize the size of a single block. + * @param withContentChecksum whether to write a content checksum + * @param withBlockChecksum whether to write a block checksum. + * Note that block checksums are not supported by the lz4 + * command line utility + * @param withBlockDependency whether a block may depend on + * the content of a previous block. Enabling this may improve + * compression ratio but makes it impossible to decompress the + * output in parallel. + * @param lz77params parameters used to fine-tune compression, + * in particular to balance compression ratio vs compression + * speed. + */ + public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum, + boolean withBlockDependency, + org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { + this.blockSize = blockSize; + this.withContentChecksum = withContentChecksum; + this.withBlockChecksum = withBlockChecksum; + this.withBlockDependency = withBlockDependency; + this.lz77params = lz77params; + } + + @Override + public String toString() { + return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum + + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency; + } + } + + /** + * Constructs a new output stream that compresses data using the + * LZ4 frame format using the default block size of 4MB. + * @param out the OutputStream to which to write the compressed data + * @throws IOException if writing the signature fails + */ + public FramedLZ4CompressorOutputStream(OutputStream out) throws IOException { + this(out, Parameters.DEFAULT); + } + + /** + * Constructs a new output stream that compresses data using the + * LZ4 frame format using the given block size. + * @param out the OutputStream to which to write the compressed data + * @param params the parameters to use + * @throws IOException if writing the signature fails + */ + public FramedLZ4CompressorOutputStream(OutputStream out, Parameters params) throws IOException { + this.params = params; + blockData = new byte[params.blockSize.getSize()]; + this.out = out; + blockHash = params.withBlockChecksum ? new XXHash32() : null; + out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); + writeFrameDescriptor(); + blockDependencyBuffer = params.withBlockDependency + ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] + : null; + } + + @Override + public void write(int b) throws IOException { + oneByte[0] = (byte) (b & 0xff); + write(oneByte); + } + + @Override + public void write(byte[] data, int off, int len) throws IOException { + if (params.withContentChecksum) { + contentHash.update(data, off, len); + } + if (currentIndex + len > blockData.length) { + flushBlock(); + while (len > blockData.length) { + System.arraycopy(data, off, blockData, 0, blockData.length); + off += blockData.length; + len -= blockData.length; + currentIndex = blockData.length; + flushBlock(); + } + } + System.arraycopy(data, off, blockData, currentIndex, len); + currentIndex += len; + } + + @Override + public void close() throws IOException { + try { + finish(); + } finally { + out.close(); + } + } + + /** + * Compresses all remaining data and writes it to the stream, + * doesn't close the underlying stream. + * @throws IOException if an error occurs + */ + public void finish() throws IOException { + if (!finished) { + if (currentIndex > 0) { + flushBlock(); + } + writeTrailer(); + finished = true; + } + } + + private void writeFrameDescriptor() throws IOException { + int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; + if (!params.withBlockDependency) { + flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; + } + if (params.withContentChecksum) { + flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; + } + if (params.withBlockChecksum) { + flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK; + } + out.write(flags); + contentHash.update(flags); + int bd = (params.blockSize.getIndex() << 4) & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK; + out.write(bd); + contentHash.update(bd); + out.write((int) ((contentHash.getValue() >> 8) & 0xff)); + contentHash.reset(); + } + + private void flushBlock() throws IOException { + final boolean withBlockDependency = params.withBlockDependency; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) { + if (withBlockDependency) { + o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, + collectedBlockDependencyBytes); + } + o.write(blockData, 0, currentIndex); + } + if (withBlockDependency) { + appendToBlockDependencyBuffer(blockData, 0, currentIndex); + } + byte[] b = baos.toByteArray(); + if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize + ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, + 4); + out.write(blockData, 0, currentIndex); + if (params.withBlockChecksum) { + blockHash.update(blockData, 0, currentIndex); + } + } else { + ByteUtils.toLittleEndian(out, b.length, 4); + out.write(b); + if (params.withBlockChecksum) { + blockHash.update(b, 0, b.length); + } + } + if (params.withBlockChecksum) { + ByteUtils.toLittleEndian(out, blockHash.getValue(), 4); + blockHash.reset(); + } + currentIndex = 0; + } + + private void writeTrailer() throws IOException { + out.write(END_MARK); + if (params.withContentChecksum) { + ByteUtils.toLittleEndian(out, contentHash.getValue(), 4); + } + } + + private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { + len = Math.min(len, blockDependencyBuffer.length); + if (len > 0) { + int keep = blockDependencyBuffer.length - len; + if (keep > 0) { + // move last keep bytes towards the start of the buffer + System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); + } + // append new data + System.arraycopy(b, off, blockDependencyBuffer, keep, len); + collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, + blockDependencyBuffer.length); + } + } + +} + diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java b/src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java new file mode 100644 index 000000000..23d29b5f4 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import static java.lang.Integer.rotateLeft; + +import java.util.zip.Checksum; + +import static org.apache.commons.compress.utils.ByteUtils.fromLittleEndian; + +/** + * Implementation of the xxhash32 hash algorithm. + * + * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a> + * @NotThreadSafe + * @since 1.14 + */ +public class XXHash32 implements Checksum { + + private static final int BUF_SIZE = 16; + private static final int ROTATE_BITS = 13; + + private static final int PRIME1 = (int) 2654435761L; + private static final int PRIME2 = (int) 2246822519L; + private static final int PRIME3 = (int) 3266489917L; + private static final int PRIME4 = 668265263; + private static final int PRIME5 = 374761393; + + private final byte[] oneByte = new byte[1]; + private final int[] state = new int[4]; + // Note: the code used to use ByteBuffer but the manual method is 50% faster + // See: https://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c + private final byte[] buffer = new byte[BUF_SIZE]; + private final int seed; + + private int totalLen; + private int pos; + + /** + * Creates an XXHash32 instance with a seed of 0. + */ + public XXHash32() { + this(0); + } + + /** + * Creates an XXHash32 instance. + * @param seed the seed to use + */ + public XXHash32(int seed) { + this.seed = seed; + initializeState(); + } + + @Override + public void reset() { + initializeState(); + totalLen = 0; + pos = 0; + } + + @Override + public void update(int b) { + oneByte[0] = (byte) (b & 0xff); + update(oneByte, 0, 1); + } + + @Override + public void update(byte[] b, int off, final int len) { + if (len <= 0) { + return; + } + totalLen += len; + + final int end = off + len; + + if (pos + len < BUF_SIZE) { + System.arraycopy(b, off, buffer, pos, len); + pos += len; + return; + } + + if (pos > 0) { + final int size = BUF_SIZE - pos; + System.arraycopy(b, off, buffer, pos, size); + process(buffer, 0); + off += size; + } + + final int limit = end - BUF_SIZE; + while (off <= limit) { + process(b, off); + off += BUF_SIZE; + } + + if (off < end) { + pos = end - off; + System.arraycopy(b, off, buffer, 0, pos); + } + } + + @Override + public long getValue() { + int hash; + if (totalLen > BUF_SIZE) { + hash = + rotateLeft(state[0], 1) + + rotateLeft(state[1], 7) + + rotateLeft(state[2], 12) + + rotateLeft(state[3], 18); + } else { + hash = state[2] + PRIME5; + } + hash += totalLen; + + int idx = 0; + final int limit = pos - 4; + for (; idx <= limit; idx += 4) { + hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4; + } + while (idx < pos) { + hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1; + } + + hash ^= hash >>> 15; + hash *= PRIME2; + hash ^= hash >>> 13; + hash *= PRIME3; + hash ^= hash >>> 16; + return hash & 0xffffffffL; + } + + private static int getInt(byte[] buffer, int idx) { + return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffL); + } + + private void initializeState() { + state[0] = seed + PRIME1 + PRIME2; + state[1] = seed + PRIME2; + state[2] = seed; + state[3] = seed - PRIME1; + } + + private void process(byte[] b, int offset) { + // local shadows for performance + int s0 = state[0]; + int s1 = state[1]; + int s2 = state[2]; + int s3 = state[3]; + + s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1; + s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1; + s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1; + s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1; + + state[0] = s0; + state[1] = s1; + state[2] = s2; + state[3] = s3; + + pos = 0; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/package.html b/src/main/java/org/apache/commons/compress/compressors/lz4/package.html new file mode 100644 index 000000000..54de62bc3 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/package.html @@ -0,0 +1,37 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for the + <a href="http://lz4.github.io/lz4/">LZ4</a> + algorithm.</p> + + <p>The block LZ4 format which only contains the compressed data is + supported by the <code>BlockLZ4Compressor*putStream</code> + classes while the frame format is implemented + by <code>FramedLZ4Compressor*putStream</code>. The + implementation in Commons Compress is based on the + specifications "Last revised: 2015-03-26" for the block format + and version "1.5.1 (31/03/2015)" for the frame format.</p> + + <p>Only the frame format can be auto-detected this means you have + to speficy the format explicitly if you want to read a block LZ4 + stream via <code>CompressorStreamFactory</code>.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java new file mode 100644 index 000000000..8a1371af9 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java @@ -0,0 +1,343 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz77support; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * Encapsulates code common to LZ77 decompressors. + * + * <p>Assumes the stream consists of blocks of literal data and + * back-references (called copies) in any order. Of course the first + * block must be a literal block for the scheme to work - unless the + * {@link #prefill prefill} method has been used to provide initial + * data that is never returned by {@link #read read} but only used for + * back-references.</p> + * + * <p>Subclasses must override the three-arg {@link #read read} method + * as the no-arg version delegates to it and the default + * implementation delegates to the no-arg version, leading to infinite + * mutual recursion and a {@code StackOverflowError} otherwise.</p> + * + * <p>The contract for subclasses' {@code read} implementation is:</p> + * <ul> + * + * <li>keep track of the current state of the stream. Is it inside a + * literal block or a back-reference or in-between blocks?</li> + * + * <li>Use {@link #readOneByte} to access the underlying stream + * directly.</li> + * + * <li>If a new literal block starts, use {@link #startLiteral} to + * tell this class about it and read the literal data using {@link + * #readLiteral} until it returns {@code 0}. {@link + * #hasMoreDataInBlock} will return {@code false} before the next + * call to {@link #readLiteral} would return {@code 0}.</li> + * + * <li>If a new back-reference starts, use {@link #startBackReference} to + * tell this class about it and read the literal data using {@link + * #readBackReference} until it returns {@code 0}. {@link + * #hasMoreDataInBlock} will return {@code false} before the next + * call to {@link #readBackReference} would return {@code 0}.</li> + * + * <li>If the end of the stream has been reached, return {@code -1} + * as this class' methods will never do so themselves.</li> + * + * </ul> + * + * <p>{@link #readOneByte} and {@link #readLiteral} update the counter + * for bytes read.</p> + * + * @since 1.14 + */ +public abstract class AbstractLZ77CompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + /** Size of the window - must be bigger than the biggest offset expected. */ + private final int windowSize; + + /** + * Buffer to write decompressed bytes to for back-references, will + * be three times windowSize big. + * + * <p>Three times so we can slide the whole buffer a windowSize to + * the left once we've read twice windowSize and still have enough + * data inside of it to satisfy back-references.</p> + */ + private final byte[] buf; + + /** One behind the index of the last byte in the buffer that was written, i.e. the next position to write to */ + private int writeIndex; + + /** Index of the next byte to be read. */ + private int readIndex; + + /** The underlying stream to read compressed data from */ + private final CountingInputStream in; + + /** Number of bytes still to be read from the current literal or back-reference. */ + private long bytesRemaining; + + /** Offset of the current back-reference. */ + private int backReferenceOffset; + + /** uncompressed size */ + private int size = 0; + + // used in no-arg read method + private final byte[] oneByte = new byte[1]; + + /** + * Supplier that delegates to {@link #readOneByte}. + */ + protected final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { + @Override + public int getAsByte() throws IOException { + return readOneByte(); + } + }; + + /** + * Creates a new LZ77 input stream. + * + * @param is + * An InputStream to read compressed data from + * @param windowSize + * Size of the window kept for back-references, must be bigger than the biggest offset expected. + * + * @throws IOException if reading fails + */ + public AbstractLZ77CompressorInputStream(final InputStream is, int windowSize) throws IOException { + this.in = new CountingInputStream(is); + this.windowSize = windowSize; + buf = new byte[3 * windowSize]; + writeIndex = readIndex = 0; + bytesRemaining = 0; + } + + /** {@inheritDoc} */ + @Override + public int read() throws IOException { + return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + in.close(); + } + + /** {@inheritDoc} */ + @Override + public int available() { + return writeIndex - readIndex; + } + + /** + * Get the uncompressed size of the stream + * + * @return the uncompressed size + */ + public int getSize() { + return size; + } + + /** + * Adds some initial data to fill the window with. + * + * <p>This is used if the stream has been cut into blocks and + * back-references of one block may refer to data of the previous + * block(s). One such example is the LZ4 frame format using block + * dependency.</p> + * + * @param data the data to fill the window with. + * @throws IllegalStateException if the stream has already started to read data + */ + public void prefill(byte[] data) { + if (writeIndex != 0) { + throw new IllegalStateException("the stream has already been read from, can't prefill anymore"); + } + // we don't need more data than the big offset could refer to, so cap it + int len = Math.min(windowSize, data.length); + // we need the last data as we are dealing with *back*-references + System.arraycopy(data, data.length - len, buf, 0, len); + writeIndex += len; + readIndex += len; + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return in.getBytesRead(); + } + + /** + * Used by subclasses to signal the next block contains the given + * amount of literal data. + * @param length the length of the block + */ + protected final void startLiteral(long length) { + bytesRemaining = length; + } + + /** + * Is there still data remaining inside the current block? + * @return true if there is still data remaining inside the current block. + */ + protected final boolean hasMoreDataInBlock() { + return bytesRemaining > 0; + } + + /** + * Reads data from the current literal block. + * @param b buffer to write data to + * @param off offset to start writing to + * @param len maximum amount of data to read + * @return number of bytes read, may be 0. Will never return -1 as + * EOF-detection is the responsibility of the subclass + * @throws IOException if the underlying stream throws or signals + * an EOF before the amount of data promised for the block have + * been read + */ + protected final int readLiteral(final byte[] b, final int off, final int len) throws IOException { + final int avail = available(); + if (len > avail) { + tryToReadLiteral(len - avail); + } + return readFromBuffer(b, off, len); + } + + private void tryToReadLiteral(int bytesToRead) throws IOException { + // min of "what is still inside the literal", "what does the user want" and "how muc can fit into the buffer" + final int reallyTryToRead = Math.min((int) Math.min(bytesToRead, bytesRemaining), + buf.length - writeIndex); + final int bytesRead = reallyTryToRead > 0 + ? IOUtils.readFully(in, buf, writeIndex, reallyTryToRead) + : 0 /* happens for bytesRemaining == 0 */; + count(bytesRead); + if (reallyTryToRead != bytesRead) { + throw new IOException("Premature end of stream reading literal"); + } + writeIndex += reallyTryToRead; + bytesRemaining -= reallyTryToRead; + } + + private int readFromBuffer(final byte[] b, final int off, final int len) { + final int readable = Math.min(len, available()); + if (readable > 0) { + System.arraycopy(buf, readIndex, b, off, readable); + readIndex += readable; + if (readIndex > 2 * windowSize) { + slideBuffer(); + } + } + size += readable; + return readable; + } + + private void slideBuffer() { + System.arraycopy(buf, windowSize, buf, 0, windowSize * 2); + writeIndex -= windowSize; + readIndex -= windowSize; + } + + /** + * Used by subclasses to signal the next block contains a back-reference with the given coordinates. + * @param offset the offset of the back-reference + * @param length the length of the back-reference + */ + protected final void startBackReference(int offset, long length) { + backReferenceOffset = offset; + bytesRemaining = length; + } + + /** + * Reads data from the current back-reference. + * @param b buffer to write data to + * @param off offset to start writing to + * @param len maximum amount of data to read + * @return number of bytes read, may be 0. Will never return -1 as + * EOF-detection is the responsibility of the subclass + */ + protected final int readBackReference(final byte[] b, final int off, final int len) { + final int avail = available(); + if (len > avail) { + tryToCopy(len - avail); + } + return readFromBuffer(b, off, len); + } + + private void tryToCopy(int bytesToCopy) { + // this will fit into the buffer without sliding and not + // require more than is available inside the back-reference + int copy = Math.min((int) Math.min(bytesToCopy, bytesRemaining), + buf.length - writeIndex); + if (copy == 0) { + // NOP + } else if (backReferenceOffset == 1) { // pretty common special case + final byte last = buf[writeIndex - 1]; + Arrays.fill(buf, writeIndex, writeIndex + copy, last); + writeIndex += copy; + } else if (copy < backReferenceOffset) { + System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, copy); + writeIndex += copy; + } else { + // back-reference overlaps with the bytes created from it + // like go back two bytes and then copy six (by copying + // the last two bytes three time). + final int fullRots = copy / backReferenceOffset; + for (int i = 0; i < fullRots; i++) { + System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, backReferenceOffset); + writeIndex += backReferenceOffset; + } + + final int pad = copy - (backReferenceOffset * fullRots); + if (pad > 0) { + System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, pad); + writeIndex += pad; + } + } + bytesRemaining -= copy; + } + + /** + * Reads a single byte from the real input stream and ensures the data is accounted for. + * + * @return the byte read as value between 0 and 255 or -1 if EOF has been reached. + * @throws IOException if the underlying stream throws + */ + protected final int readOneByte() throws IOException { + final int b = in.read(); + if (b != -1) { + count(1); + return b & 0xFF; + } + return -1; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java new file mode 100644 index 000000000..27fec8d3c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java @@ -0,0 +1,559 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz77support; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Helper class for compression algorithms that use the ideas of LZ77. + * + * <p>Most LZ77 derived algorithms split input data into blocks of + * uncompressed data (called literal blocks) and back-references + * (pairs of offsets and lengths) that state "add <code>length</code> + * bytes that are the same as those already written starting + * <code>offset</code> bytes before the current position. The details + * of how those blocks and back-references are encoded are quite + * different between the algorithms and some algorithms perform + * additional steps (Huffman encoding in the case of DEFLATE for + * example).</p> + * + * <p>This class attempts to extract the core logic - finding + * back-references - so it can be re-used. It follows the algorithm + * explained in section 4 of RFC 1951 (DEFLATE) and currently doesn't + * implement the "lazy match" optimization. The three-byte hash + * function used in this class is the same as the one used by zlib and + * InfoZIP's ZIP implementation of DEFLATE. The whole class is + * strongly inspired by InfoZIP's implementation.</p> + * + * <p>LZ77 is used vaguely here (as well as many other places that + * talk about it :-), LZSS would likely be closer to the truth but + * LZ77 has become the synonym for a whole family of algorithms.</p> + * + * <p>The API consists of a compressor that is fed <code>byte</code>s + * and emits {@link Block}s to a registered callback where the blocks + * represent either {@link LiteralBlock literal blocks}, {@link + * BackReference back-references} or {@link EOD end of data + * markers}. In order to ensure the callback receives all information, + * the {@code #finish} method must be used once all data has been fed + * into the compressor.</p> + * + * <p>Several parameters influence the outcome of the "compression":</p> + * <dl> + * + * <dt><code>windowSize</code></dt> <dd>the size of the sliding + * window, must be a power of two - this determines the maximum + * offset a back-reference can take. The compressor maintains a + * buffer of twice of <code>windowSize</code> - real world values are + * in the area of 32k.</dd> + * + * <dt><code>minBackReferenceLength</code></dt> + * <dd>Minimal length of a back-reference found. A true minimum of 3 is + * hard-coded inside of this implemention but bigger lengths can be + * configured.</dd> + * + * <dt><code>maxBackReferenceLength</code></dt> + * <dd>Maximal length of a back-reference found.</dd> + * + * <dt><code>maxOffset</code></dt> + * <dd>Maximal offset of a back-reference.</dd> + * + * <dt><code>maxLiteralLength</code></dt> + * <dd>Maximal length of a literal block.</dd> + * </dl> + * + * @see "https://tools.ietf.org/html/rfc1951#section-4" + * @since 1.14 + * @NotThreadSafe + */ +public class LZ77Compressor { + + /** + * Base class representing blocks the compressor may emit. + * + * <p>This class is not supposed to be subclassed by classes + * outside of Commons Compress so it is considered internal and + * changed that would break subclasses may get introduced with + * future releases.</p> + */ + public static abstract class Block { + /** Enumeration of the block types the compressor may emit. */ + public enum BlockType { + LITERAL, BACK_REFERENCE, EOD + } + public abstract BlockType getType(); + } + + /** + * Represents a literal block of data. + * + * <p>For performance reasons this encapsulates the real data, not + * a copy of it. Don't modify the data and process it inside of + * {@link Callback#accept} immediately as it will get overwritten + * sooner or later.</p> + */ + public static final class LiteralBlock extends Block { + private final byte[] data; + private final int offset, length; + public LiteralBlock(byte[] data, int offset, int length) { + this.data = data; + this.offset = offset; + this.length = length; + } + /** + * The literal data. + * + * <p>This returns a life view of the actual data in order to + * avoid copying, modify the array at your own risk.</p> + * @return the data + */ + public byte[] getData() { + return data; + } + /** + * Offset into data where the literal block starts. + * @return the offset + */ + public int getOffset() { + return offset; + } + /** + * Length of literal block. + * @return the length + */ + public int getLength() { + return length; + } + @Override + public BlockType getType() { + return BlockType.LITERAL; + } + @Override + public String toString() { + return "LiteralBlock starting at " + offset + " with length " + length; + } + } + + /** + * Represents a back-reference. + */ + public static final class BackReference extends Block { + private final int offset, length; + public BackReference(int offset, int length) { + this.offset = offset; + this.length = length; + } + /** + * Provides the offset of the back-reference. + * @return the offset + */ + public int getOffset() { + return offset; + } + /** + * Provides the length of the back-reference. + * @return the length + */ + public int getLength() { + return length; + } + @Override + public BlockType getType() { + return BlockType.BACK_REFERENCE; + } + @Override + public String toString() { + return "BackReference with offset " + offset + " and length " + length; + } + } + + /** A simple "we are done" marker. */ + public static final class EOD extends Block { + @Override + public BlockType getType() { + return BlockType.EOD; + } + } + + private static final Block THE_EOD = new EOD(); + + /** + * Callback invoked while the compressor processes data. + * + * <p>The callback is invoked on the same thread that receives the + * bytes to compress and may be invoked multiple times during the + * execution of {@link #compress} or {@link #finish}.</p> + */ + public interface Callback { + /** + * Consumes a block. + * @param b the block to consume + * @throws IOException in case of an error + */ + void accept(Block b) throws IOException; + } + + static final int NUMBER_OF_BYTES_IN_HASH = 3; + private static final int NO_MATCH = -1; + + private final Parameters params; + private final Callback callback; + + // the sliding window, twice as big as "windowSize" parameter + private final byte[] window; + // the head of hash-chain - indexed by hash-code, points to the + // location inside of window of the latest sequence of bytes with + // the given hash. + private final int[] head; + // for each window-location points to the latest earlier location + // with the same hash. Only stores values for the latest + // "windowSize" elements, the index is "window location modulo + // windowSize". + private final int[] prev; + + // bit mask used when indexing into prev + private final int wMask; + + private boolean initialized = false; + // the position inside of window that shall be encoded right now + private int currentPosition; + // the number of bytes available to compress including the one at + // currentPosition + private int lookahead = 0; + // the hash of the three bytes stating at the current position + private int insertHash = 0; + // the position inside of the window where the current literal + // block starts (in case we are inside of a literal block). + private int blockStart = 0; + // position of the current match + private int matchStart = NO_MATCH; + // number of missed insertString calls for the up to three last + // bytes of the last match that can only be performed once more + // data has been read + private int missedInserts = 0; + + /** + * Initializes a compressor with parameters and a callback. + * @param params the parameters + * @param callback the callback + * @throws NullPointerException if either parameter is <code>null</code> + */ + public LZ77Compressor(Parameters params, Callback callback) { + if (params == null) { + throw new NullPointerException("params must not be null"); + } + if (callback == null) { + throw new NullPointerException("callback must not be null"); + } + this.params = params; + this.callback = callback; + + final int wSize = params.getWindowSize(); + window = new byte[wSize * 2]; + wMask = wSize - 1; + head = new int[HASH_SIZE]; + Arrays.fill(head, NO_MATCH); + prev = new int[wSize]; + } + + /** + * Feeds bytes into the compressor which in turn may emit zero or + * more blocks to the callback during the execution of this + * method. + * @param data the data to compress - must not be null + * @throws IOException if the callback throws an exception + */ + public void compress(byte[] data) throws IOException { + compress(data, 0, data.length); + } + + /** + * Feeds bytes into the compressor which in turn may emit zero or + * more blocks to the callback during the execution of this + * method. + * @param data the data to compress - must not be null + * @param off the start offset of the data + * @param len the number of bytes to compress + * @throws IOException if the callback throws an exception + */ + public void compress(byte[] data, int off, int len) throws IOException { + final int wSize = params.getWindowSize(); + while (len > wSize) { // chop into windowSize sized chunks + doCompress(data, off, wSize); + off += wSize; + len -= wSize; + } + if (len > 0) { + doCompress(data, off, len); + } + } + + /** + * Tells the compressor to process all remaining data and signal + * end of data to the callback. + * + * <p>The compressor will in turn emit at least one block ({@link + * EOD}) but potentially multiple blocks to the callback during + * the execution of this method.</p> + * @throws IOException if the callback throws an exception + */ + public void finish() throws IOException { + if (blockStart != currentPosition || lookahead > 0) { + currentPosition += lookahead; + flushLiteralBlock(); + } + callback.accept(THE_EOD); + } + + /** + * Adds some initial data to fill the window with. + * + * <p>This is used if the stream has been cut into blocks and + * back-references of one block may refer to data of the previous + * block(s). One such example is the LZ4 frame format using block + * dependency.</p> + * + * @param data the data to fill the window with. + * @throws IllegalStateException if the compressor has already started to accept data + */ + public void prefill(byte[] data) { + if (currentPosition != 0 || lookahead != 0) { + throw new IllegalStateException("the compressor has already started to accept data, can't prefill anymore"); + } + + // don't need more than windowSize for back-references + final int len = Math.min(params.getWindowSize(), data.length); + System.arraycopy(data, data.length - len, window, 0, len); + + if (len >= NUMBER_OF_BYTES_IN_HASH) { + initialize(); + final int stop = len - NUMBER_OF_BYTES_IN_HASH + 1; + for (int i = 0; i < stop; i++) { + insertString(i); + } + missedInserts = NUMBER_OF_BYTES_IN_HASH - 1; + } else { // not enough data to hash anything + missedInserts = len; + } + blockStart = currentPosition = len; + } + + // we use a 15 bit hashcode as calculated in updateHash + private static final int HASH_SIZE = 1 << 15; + private static final int HASH_MASK = HASH_SIZE - 1; + private static final int H_SHIFT = 5; + + /** + * Assumes we are calculating the hash for three consecutive bytes + * as a rolling hash, i.e. for bytes ABCD if H is the hash of ABC + * the new hash for BCD is nextHash(H, D). + * + * <p>The hash is shifted by five bits on each update so all + * effects of A have been swapped after the third update.</p> + */ + private int nextHash(int oldHash, byte nextByte) { + final int nextVal = nextByte & 0xFF; + return ((oldHash << H_SHIFT) ^ nextVal) & HASH_MASK; + } + + // performs the actual algorithm with the pre-condition len <= windowSize + private void doCompress(byte[] data, int off, int len) throws IOException { + int spaceLeft = window.length - currentPosition - lookahead; + if (len > spaceLeft) { + slide(); + } + System.arraycopy(data, off, window, currentPosition + lookahead, len); + lookahead += len; + if (!initialized && lookahead >= params.getMinBackReferenceLength()) { + initialize(); + } + if (initialized) { + compress(); + } + } + + private void slide() throws IOException { + final int wSize = params.getWindowSize(); + if (blockStart != currentPosition && blockStart < wSize) { + flushLiteralBlock(); + blockStart = currentPosition; + } + System.arraycopy(window, wSize, window, 0, wSize); + currentPosition -= wSize; + matchStart -= wSize; + blockStart -= wSize; + for (int i = 0; i < HASH_SIZE; i++) { + int h = head[i]; + head[i] = h >= wSize ? h - wSize : NO_MATCH; + } + for (int i = 0; i < wSize; i++) { + int p = prev[i]; + prev[i] = p >= wSize ? p - wSize : NO_MATCH; + } + } + + private void initialize() { + for (int i = 0; i < NUMBER_OF_BYTES_IN_HASH - 1; i++) { + insertHash = nextHash(insertHash, window[i]); + } + initialized = true; + } + + private void compress() throws IOException { + final int minMatch = params.getMinBackReferenceLength(); + final boolean lazy = params.getLazyMatching(); + final int lazyThreshold = params.getLazyMatchingThreshold(); + + while (lookahead >= minMatch) { + catchUpMissedInserts(); + int matchLength = 0; + int hashHead = insertString(currentPosition); + if (hashHead != NO_MATCH && hashHead - currentPosition <= params.getMaxOffset()) { + // sets matchStart as a side effect + matchLength = longestMatch(hashHead); + + if (lazy && matchLength <= lazyThreshold && lookahead > minMatch) { + // try to find a longer match using the next position + matchLength = longestMatchForNextPosition(matchLength); + } + } + if (matchLength >= minMatch) { + if (blockStart != currentPosition) { + // emit preceeding literal block + flushLiteralBlock(); + blockStart = NO_MATCH; + } + flushBackReference(matchLength); + insertStringsInMatch(matchLength); + lookahead -= matchLength; + currentPosition += matchLength; + blockStart = currentPosition; + } else { + // no match, append to current or start a new literal + lookahead--; + currentPosition++; + if (currentPosition - blockStart >= params.getMaxLiteralLength()) { + flushLiteralBlock(); + blockStart = currentPosition; + } + } + } + } + + /** + * Inserts the current three byte sequence into the dictionary and + * returns the previous head of the hash-chain. + * + * <p>Updates <code>insertHash</code> and <code>prev</code> as a + * side effect.</p> + */ + private int insertString(int pos) { + insertHash = nextHash(insertHash, window[pos - 1 + NUMBER_OF_BYTES_IN_HASH]); + int hashHead = head[insertHash]; + prev[pos & wMask] = hashHead; + head[insertHash] = pos; + return hashHead; + } + + private int longestMatchForNextPosition(final int prevMatchLength) { + // save a bunch of values to restore them if the next match isn't better than the current one + final int prevMatchStart = matchStart; + final int prevInsertHash = insertHash; + + lookahead--; + currentPosition++; + int hashHead = insertString(currentPosition); + final int prevHashHead = prev[currentPosition & wMask]; + int matchLength = longestMatch(hashHead); + + if (matchLength <= prevMatchLength) { + // use the first match, as the next one isn't any better + matchLength = prevMatchLength; + matchStart = prevMatchStart; + + // restore modified values + head[insertHash] = prevHashHead; + insertHash = prevInsertHash; + currentPosition--; + lookahead++; + } + return matchLength; + } + + private void insertStringsInMatch(int matchLength) { + // inserts strings contained in current match + // insertString inserts the byte 2 bytes after position, which may not yet be available -> missedInserts + final int stop = Math.min(matchLength - 1, lookahead - NUMBER_OF_BYTES_IN_HASH); + // currentPosition has been inserted already + for (int i = 1; i <= stop; i++) { + insertString(currentPosition + i); + } + missedInserts = matchLength - stop - 1; + } + + private void catchUpMissedInserts() { + while (missedInserts > 0) { + insertString(currentPosition - missedInserts--); + } + } + + private void flushBackReference(int matchLength) throws IOException { + callback.accept(new BackReference(currentPosition - matchStart, matchLength)); + } + + private void flushLiteralBlock() throws IOException { + callback.accept(new LiteralBlock(window, blockStart, currentPosition - blockStart)); + } + + /** + * Searches the hash chain for real matches and returns the length + * of the longest match (0 if none were found) that isn't too far + * away (WRT maxOffset). + * + * <p>Sets matchStart to the index of the start position of the + * longest match as a side effect.</p> + */ + private int longestMatch(int matchHead) { + final int minLength = params.getMinBackReferenceLength(); + int longestMatchLength = minLength - 1; + final int maxPossibleLength = Math.min(params.getMaxBackReferenceLength(), lookahead); + final int minIndex = Math.max(0, currentPosition - params.getMaxOffset()); + final int niceBackReferenceLength = Math.min(maxPossibleLength, params.getNiceBackReferenceLength()); + final int maxCandidates = params.getMaxCandidates(); + for (int candidates = 0; candidates < maxCandidates && matchHead >= minIndex; candidates++) { + int currentLength = 0; + for (int i = 0; i < maxPossibleLength; i++) { + if (window[matchHead + i] != window[currentPosition + i]) { + break; + } + currentLength++; + } + if (currentLength > longestMatchLength) { + longestMatchLength = currentLength; + matchStart = matchHead; + if (currentLength >= niceBackReferenceLength) { + // no need to search any further + break; + } + } + matchHead = prev[matchHead & wMask]; + } + return longestMatchLength; // < minLength if no matches have been found, will be ignored in compress() + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java new file mode 100644 index 000000000..fe892f37f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz77support; + +/** + * Parameters of the {@link LZ77Compressor compressor}. + */ +public final class Parameters { + /** + * The hard-coded absolute minimal length of a back-reference. + */ + public static final int TRUE_MIN_BACK_REFERENCE_LENGTH = LZ77Compressor.NUMBER_OF_BYTES_IN_HASH; + + /** + * Initializes the builder for the compressor's parameters with a + * <code>minBackReferenceLength</code> of 3 and <code>max*Length</code> + * equal to <code>windowSize - 1</code>. + * + * <p>It is recommended to not use this method directly but rather + * tune a pre-configured builder created by a format specific + * factory like {@link + * org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.</p> + * + * @param windowSize the size of the sliding window - this + * determines the maximum offset a back-reference can take. Must + * be a power of two. + * @throws IllegalArgumentException if windowSize is not a power of two. + * @return a builder configured for the given window size + */ + public static Builder builder(int windowSize) { + return new Builder(windowSize); + } + + /** + * Builder for {@link Parameters} instances. + */ + public static class Builder { + private final int windowSize; + private int minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength; + private Integer niceBackReferenceLength, maxCandidates, lazyThreshold; + private Boolean lazyMatches; + + private Builder(int windowSize) { + if (windowSize < 2 || !isPowerOfTwo(windowSize)) { + throw new IllegalArgumentException("windowSize must be a power of two"); + } + this.windowSize = windowSize; + minBackReferenceLength = TRUE_MIN_BACK_REFERENCE_LENGTH; + maxBackReferenceLength = windowSize - 1; + maxOffset = windowSize - 1; + maxLiteralLength = windowSize; + } + + /** + * Sets the mininal length of a back-reference. + * + * <p>Ensures <code>maxBackReferenceLength</code> is not + * smaller than <code>minBackReferenceLength</code>. + * + * <p>It is recommended to not use this method directly but + * rather tune a pre-configured builder created by a format + * specific factory like {@link + * org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.</p> + * + * @param minBackReferenceLength the minimal length of a back-reference found. A + * true minimum of 3 is hard-coded inside of this implemention + * but bigger lengths can be configured. + * @throws IllegalArgumentException if <code>windowSize</code> + * is smaller than <code>minBackReferenceLength</code>. + * @return the builder + */ + public Builder withMinBackReferenceLength(int minBackReferenceLength) { + this.minBackReferenceLength = Math.max(TRUE_MIN_BACK_REFERENCE_LENGTH, minBackReferenceLength); + if (windowSize < this.minBackReferenceLength) { + throw new IllegalArgumentException("minBackReferenceLength can't be bigger than windowSize"); + } + if (maxBackReferenceLength < this.minBackReferenceLength) { + maxBackReferenceLength = this.minBackReferenceLength; + } + return this; + } + + /** + * Sets the maximal length of a back-reference. + * + * <p>It is recommended to not use this method directly but + * rather tune a pre-configured builder created by a format + * specific factory like {@link + * org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.</p> + * + * @param maxBackReferenceLength maximal length of a + * back-reference found. A value smaller than + * <code>minBackReferenceLength</code> is interpreted as + * <code>minBackReferenceLength</code>. <code>maxBackReferenceLength</code> + * is capped at <code>windowSize - 1</code>. + * @return the builder + */ + public Builder withMaxBackReferenceLength(int maxBackReferenceLength) { + this.maxBackReferenceLength = maxBackReferenceLength < minBackReferenceLength ? minBackReferenceLength + : Math.min(maxBackReferenceLength, windowSize - 1); + return this; + } + + /** + * Sets the maximal offset of a back-reference. + * + * <p>It is recommended to not use this method directly but + * rather tune a pre-configured builder created by a format + * specific factory like {@link + * org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.</p> + * + * @param maxOffset maximal offset of a back-reference. A + * non-positive value as well as values bigger than + * <code>windowSize - 1</code> are interpreted as <code>windowSize + * - 1</code>. + * @return the builder + */ + public Builder withMaxOffset(int maxOffset) { + this.maxOffset = maxOffset < 1 ? windowSize - 1 : Math.min(maxOffset, windowSize - 1); + return this; + } + + /** + * Sets the maximal length of a literal block. + * + * <p>It is recommended to not use this method directly but + * rather tune a pre-configured builder created by a format + * specific factory like {@link + * org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream#createParameterBuilder}.</p> + * + * @param maxLiteralLength maximal length of a literal + * block. Negative numbers and 0 as well as values bigger than + * <code>windowSize</code> are interpreted as + * <code>windowSize</code>. + * @return the builder + */ + public Builder withMaxLiteralLength(int maxLiteralLength) { + this.maxLiteralLength = maxLiteralLength < 1 ? windowSize + : Math.min(maxLiteralLength, windowSize); + return this; + } + + /** + * Sets the "nice length" of a back-reference. + * + * <p>When a back-references if this size has been found, stop searching for longer back-references.</p> + * + * <p>This settings can be used to tune the tradeoff between compression speed and compression ratio.</p> + * @param niceLen the "nice length" of a back-reference + * @return the builder + */ + public Builder withNiceBackReferenceLength(int niceLen) { + niceBackReferenceLength = niceLen; + return this; + } + + /** + * Sets the maximum number of back-reference candidates that should be consulted. + * + * <p>This settings can be used to tune the tradeoff between compression speed and compression ratio.</p> + * @param maxCandidates maximum number of back-reference candidates + * @return the builder + */ + public Builder withMaxNumberOfCandidates(int maxCandidates) { + this.maxCandidates = maxCandidates; + return this; + } + + /** + * Sets whether lazy matching should be performed. + * + * <p>Lazy matching means that after a back-reference for a certain position has been found the compressor will + * try to find a longer match for the next position.</p> + * + * <p>Lazy matching is enabled by default and disabled when tuning for speed.</p> + * @param lazy whether lazy matching should be performed + * @return the builder + */ + public Builder withLazyMatching(boolean lazy) { + lazyMatches = lazy; + return this; + } + + /** + * Sets the threshold for lazy matching. + * + * <p>Even if lazy matching is enabled it will not be performed if the length of the back-reference found for + * the current position is longer than this value.</p> + * @param threshold the threshold for lazy matching + * @return the builder + */ + public Builder withLazyThreshold(int threshold) { + lazyThreshold = threshold; + return this; + } + + /** + * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved + * compression speed at the cost of compression ratio. + * + * <p>Use this method after configuring "maximum back-reference length".</p> + * @return the builder + */ + public Builder tunedForSpeed() { + niceBackReferenceLength = Math.max(minBackReferenceLength, maxBackReferenceLength / 8); + maxCandidates = Math.max(32, windowSize / 1024); + lazyMatches = false; + lazyThreshold = minBackReferenceLength; + return this; + } + + /** + * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved + * compression ratio at the cost of compression speed. + * + * <p>Use this method after configuring "maximum back-reference length".</p> + * @return the builder + */ + public Builder tunedForCompressionRatio() { + niceBackReferenceLength = lazyThreshold = maxBackReferenceLength; + maxCandidates = Math.max(32, windowSize / 16); + lazyMatches = true; + return this; + } + + /** + * Creates the {@link Parameters} instance. + * @return the configured {@link Parameters} instance. + */ + public Parameters build() { + // default settings tuned for a compromise of good compression and acceptable speed + int niceLen = niceBackReferenceLength != null ? niceBackReferenceLength + : Math.max(minBackReferenceLength, maxBackReferenceLength / 2); + int candidates = maxCandidates != null ? maxCandidates : Math.max(256, windowSize / 128); + boolean lazy = lazyMatches == null || lazyMatches; + int threshold = lazy ? (lazyThreshold != null ? lazyThreshold : niceLen) : minBackReferenceLength; + + return new Parameters(windowSize, minBackReferenceLength, maxBackReferenceLength, + maxOffset, maxLiteralLength, niceLen, candidates, lazy, threshold); + } + } + + private final int windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength, + niceBackReferenceLength, maxCandidates, lazyThreshold; + private final boolean lazyMatching; + + private Parameters(int windowSize, int minBackReferenceLength, int maxBackReferenceLength, int maxOffset, + int maxLiteralLength, int niceBackReferenceLength, int maxCandidates, boolean lazyMatching, + int lazyThreshold) { + this.windowSize = windowSize; + this.minBackReferenceLength = minBackReferenceLength; + this.maxBackReferenceLength = maxBackReferenceLength; + this.maxOffset = maxOffset; + this.maxLiteralLength = maxLiteralLength; + this.niceBackReferenceLength = niceBackReferenceLength; + this.maxCandidates = maxCandidates; + this.lazyMatching = lazyMatching; + this.lazyThreshold = lazyThreshold; + } + + /** + * Gets the size of the sliding window - this determines the + * maximum offset a back-reference can take. + * @return the size of the sliding window + */ + public int getWindowSize() { + return windowSize; + } + /** + * Gets the minimal length of a back-reference found. + * @return the minimal length of a back-reference found + */ + public int getMinBackReferenceLength() { + return minBackReferenceLength; + } + /** + * Gets the maximal length of a back-reference found. + * @return the maximal length of a back-reference found + */ + public int getMaxBackReferenceLength() { + return maxBackReferenceLength; + } + /** + * Gets the maximal offset of a back-reference found. + * @return the maximal offset of a back-reference found + */ + public int getMaxOffset() { + return maxOffset; + } + /** + * Gets the maximal length of a literal block. + * @return the maximal length of a literal block + */ + public int getMaxLiteralLength() { + return maxLiteralLength; + } + + /** + * Gets the length of a back-reference that is considered nice enough to stop searching for longer ones. + * @return the length of a back-reference that is considered nice enough to stop searching + */ + public int getNiceBackReferenceLength() { + return niceBackReferenceLength; + } + + /** + * Gets the maximum number of back-reference candidates to consider. + * @return the maximum number of back-reference candidates to consider + */ + public int getMaxCandidates() { + return maxCandidates; + } + + /** + * Gets whether to perform lazy matching. + * @return whether to perform lazy matching + */ + public boolean getLazyMatching() { + return lazyMatching; + } + + /** + * Gets the threshold for lazy matching. + * @return the threshold for lazy matching + */ + public int getLazyMatchingThreshold() { + return lazyThreshold; + } + + private static final boolean isPowerOfTwo(int x) { + // pre-condition: x > 0 + return (x & (x - 1)) == 0; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/package.html b/src/main/java/org/apache/commons/compress/compressors/lz77support/package.html new file mode 100644 index 000000000..951b1460a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/package.html @@ -0,0 +1,28 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides utility classes for LZ77 based algorithms.</p> + + <p>The classes in this package are currently used by the LZ4 and + Snappy implementations but might also help implementing other + algorithms that derive from LZ77 and LZSS.</p> + + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/lzma/LZMACompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lzma/LZMACompressorInputStream.java new file mode 100644 index 000000000..794e3a540 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lzma/LZMACompressorInputStream.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lzma; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.MemoryLimitException; +import org.tukaani.xz.LZMAInputStream; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * LZMA decompressor. + * @since 1.6 + */ +public class LZMACompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + private final CountingInputStream countingStream; + private final InputStream in; + + /** + * Creates a new input stream that decompresses LZMA-compressed data + * from the specified input stream. + * + * @param inputStream where to read the compressed data + * + * @throws IOException if the input is not in the .lzma format, + * the input is corrupt or truncated, the .lzma + * headers specify sizes that are not supported + * by this implementation, or the underlying + * <code>inputStream</code> throws an exception + */ + public LZMACompressorInputStream(final InputStream inputStream) + throws IOException { + in = new LZMAInputStream(countingStream = new CountingInputStream(inputStream), -1); + } + + /** + * Creates a new input stream that decompresses LZMA-compressed data + * from the specified input stream. + * + * @param inputStream where to read the compressed data + * + * @param memoryLimitInKb calculated memory use threshold. Throws MemoryLimitException + * if calculate memory use is above this threshold + * + * @throws IOException if the input is not in the .lzma format, + * the input is corrupt or truncated, the .lzma + * headers specify sizes that are not supported + * by this implementation, or the underlying + * <code>inputStream</code> throws an exception + * + * @since 1.14 + */ + public LZMACompressorInputStream(final InputStream inputStream, int memoryLimitInKb) + throws IOException { + try { + in = new LZMAInputStream(countingStream = new CountingInputStream(inputStream), memoryLimitInKb); + } catch (org.tukaani.xz.MemoryLimitException e) { + //convert to commons-compress exception + throw new MemoryLimitException(e.getMemoryNeeded(), e.getMemoryLimit(), e); + } + } + + /** {@inheritDoc} */ + @Override + public int read() throws IOException { + final int ret = in.read(); + count(ret == -1 ? 0 : 1); + return ret; + } + + /** {@inheritDoc} */ + @Override + public int read(final byte[] buf, final int off, final int len) throws IOException { + final int ret = in.read(buf, off, len); + count(ret); + return ret; + } + + /** {@inheritDoc} */ + @Override + public long skip(final long n) throws IOException { + return IOUtils.skip(in, n); + } + + /** {@inheritDoc} */ + @Override + public int available() throws IOException { + return in.available(); + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + in.close(); + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return countingStream.getBytesRead(); + } + + /** + * Checks if the signature matches what is expected for an lzma file. + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is an lzma compressed stream, false otherwise + * + * @since 1.10 + */ + public static boolean matches(final byte[] signature, final int length) { + return signature != null && length >= 3 && + signature[0] == 0x5d && signature[1] == 0 && + signature[2] == 0; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lzma/LZMACompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lzma/LZMACompressorOutputStream.java new file mode 100644 index 000000000..e6bdfa455 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lzma/LZMACompressorOutputStream.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lzma; + +import java.io.IOException; +import java.io.OutputStream; +import org.tukaani.xz.LZMA2Options; +import org.tukaani.xz.LZMAOutputStream; + +import org.apache.commons.compress.compressors.CompressorOutputStream; + +/** + * LZMA compressor. + * @since 1.13 + */ +public class LZMACompressorOutputStream extends CompressorOutputStream { + private final LZMAOutputStream out; + + /** + * Creates a LZMA compressor. + * + * @param outputStream the stream to wrap + * @throws IOException on error + */ + public LZMACompressorOutputStream(final OutputStream outputStream) + throws IOException { + out = new LZMAOutputStream(outputStream, new LZMA2Options(), -1); + } + + /** {@inheritDoc} */ + @Override + public void write(final int b) throws IOException { + out.write(b); + } + + /** {@inheritDoc} */ + @Override + public void write(final byte[] buf, final int off, final int len) throws IOException { + out.write(buf, off, len); + } + + /** + * Doesn't do anything as {@link LZMAOutputStream} doesn't support flushing. + */ + @Override + public void flush() throws IOException { + } + + /** + * Finishes compression without closing the underlying stream. + * No more data can be written to this stream after finishing. + * @throws IOException on error + */ + public void finish() throws IOException { + out.finish(); + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + out.close(); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lzma/LZMAUtils.java b/src/main/java/org/apache/commons/compress/compressors/lzma/LZMAUtils.java new file mode 100644 index 000000000..8722e6d89 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lzma/LZMAUtils.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lzma; + +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.compressors.FileNameUtil; + +/** + * Utility code for the lzma compression format. + * @ThreadSafe + * @since 1.10 + */ +public class LZMAUtils { + + private static final FileNameUtil fileNameUtil; + + /** + * LZMA Header Magic Bytes begin a LZMA file. + */ + private static final byte[] HEADER_MAGIC = { + (byte) 0x5D, 0, 0 + }; + + enum CachedAvailability { + DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE + } + + private static volatile CachedAvailability cachedLZMAAvailability; + + static { + final Map<String, String> uncompressSuffix = new HashMap<>(); + uncompressSuffix.put(".lzma", ""); + uncompressSuffix.put("-lzma", ""); + fileNameUtil = new FileNameUtil(uncompressSuffix, ".lzma"); + cachedLZMAAvailability = CachedAvailability.DONT_CACHE; + try { + Class.forName("org.osgi.framework.BundleEvent"); + } catch (final Exception ex) { + setCacheLZMAAvailablity(true); + } + } + + /** Private constructor to prevent instantiation of this utility class. */ + private LZMAUtils() { + } + + /** + * Checks if the signature matches what is expected for a .lzma file. + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true if signature matches the .lzma magic bytes, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < HEADER_MAGIC.length) { + return false; + } + + for (int i = 0; i < HEADER_MAGIC.length; ++i) { + if (signature[i] != HEADER_MAGIC[i]) { + return false; + } + } + + return true; + } + + /** + * Are the classes required to support LZMA compression available? + * @return true if the classes required to support LZMA + * compression are available + */ + public static boolean isLZMACompressionAvailable() { + final CachedAvailability cachedResult = cachedLZMAAvailability; + if (cachedResult != CachedAvailability.DONT_CACHE) { + return cachedResult == CachedAvailability.CACHED_AVAILABLE; + } + return internalIsLZMACompressionAvailable(); + } + + private static boolean internalIsLZMACompressionAvailable() { + try { + LZMACompressorInputStream.matches(null, 0); + return true; + } catch (final NoClassDefFoundError error) { + return false; + } + } + + /** + * Detects common lzma suffixes in the given filename. + * + * @param filename name of a file + * @return {@code true} if the filename has a common lzma suffix, + * {@code false} otherwise + */ + public static boolean isCompressedFilename(final String filename) { + return fileNameUtil.isCompressedFilename(filename); + } + + /** + * Maps the given name of a lzma-compressed file to the name that + * the file should have after uncompression. Any filenames with + * the generic ".lzma" suffix (or any other generic lzma suffix) + * is mapped to a name without that suffix. If no lzma suffix is + * detected, then the filename is returned unmapped. + * + * @param filename name of a file + * @return name of the corresponding uncompressed file + */ + public static String getUncompressedFilename(final String filename) { + return fileNameUtil.getUncompressedFilename(filename); + } + + /** + * Maps the given filename to the name that the file should have after + * compression with lzma. + * + * @param filename name of a file + * @return name of the corresponding compressed file + */ + public static String getCompressedFilename(final String filename) { + return fileNameUtil.getCompressedFilename(filename); + } + + /** + * Whether to cache the result of the LZMA check. + * + * <p>This defaults to {@code false} in an OSGi environment and {@code true} otherwise.</p> + * @param doCache whether to cache the result + */ + public static void setCacheLZMAAvailablity(final boolean doCache) { + if (!doCache) { + cachedLZMAAvailability = CachedAvailability.DONT_CACHE; + } else if (cachedLZMAAvailability == CachedAvailability.DONT_CACHE) { + final boolean hasLzma = internalIsLZMACompressionAvailable(); + cachedLZMAAvailability = hasLzma ? CachedAvailability.CACHED_AVAILABLE // NOSONAR + : CachedAvailability.CACHED_UNAVAILABLE; + } + } + + // only exists to support unit tests + static CachedAvailability getCachedLZMAAvailability() { + return cachedLZMAAvailability; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lzma/package.html b/src/main/java/org/apache/commons/compress/compressors/lzma/package.html new file mode 100644 index 000000000..f3b54730f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lzma/package.html @@ -0,0 +1,32 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes using the "stand-alone" LZMA + algorithm.</p> + + <p>The classes in this package are wrappers around stream classes + provided by the public + domain <a href="https://tukaani.org/xz/java.html">XZ for Java</a> + library.</p> + + <p>In general you should prefer the more modern and robust XZ + format over stand-alone LZMA compression.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java new file mode 100644 index 000000000..a5e512c05 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lzw; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; + +import org.apache.commons.compress.MemoryLimitException; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.BitInputStream; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * <p>Generic LZW implementation. It is used internally for + * the Z decompressor and the Unshrinking Zip file compression method, + * but may be useful for third-party projects in implementing their own LZW variations.</p> + * + * @NotThreadSafe + * @since 1.10 + */ +public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics { + protected static final int DEFAULT_CODE_SIZE = 9; + protected static final int UNUSED_PREFIX = -1; + + private final byte[] oneByte = new byte[1]; + + protected final BitInputStream in; + private int clearCode = -1; + private int codeSize = DEFAULT_CODE_SIZE; + private byte previousCodeFirstChar; + private int previousCode = UNUSED_PREFIX; + private int tableSize; + private int[] prefixes; + private byte[] characters; + private byte[] outputStack; + private int outputStackLocation; + + protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) { + this.in = new BitInputStream(inputStream, byteOrder); + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public int read() throws IOException { + final int ret = read(oneByte); + if (ret < 0) { + return ret; + } + return 0xff & oneByte[0]; + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + int bytesRead = readFromStack(b, off, len); + while (len - bytesRead > 0) { + final int result = decompressNextSymbol(); + if (result < 0) { + if (bytesRead > 0) { + count(bytesRead); + return bytesRead; + } + return result; + } + bytesRead += readFromStack(b, off + bytesRead, len - bytesRead); + } + count(bytesRead); + return bytesRead; + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return in.getBytesRead(); + } + + /** + * Read the next code and expand it. + * @return the expanded next code + * @throws IOException on error + */ + protected abstract int decompressNextSymbol() throws IOException; + + /** + * Add a new entry to the dictionary. + * @param previousCode the previous code + * @param character the next character to append + * @return the new code + * @throws IOException on error + */ + protected abstract int addEntry(int previousCode, byte character) + throws IOException; + + /** + * Sets the clear code based on the code size. + * @param codeSize code size + */ + protected void setClearCode(final int codeSize) { + clearCode = (1 << (codeSize - 1)); + } + + /** + * Initializes the arrays based on the maximum code size. + * First checks that the estimated memory usage is below memoryLimitInKb + * + * @param maxCodeSize maximum code size + * @param memoryLimitInKb maximum allowed estimated memory usage in Kb + * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb + */ + protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) + throws MemoryLimitException { + + if (memoryLimitInKb > -1) { + final int maxTableSize = 1 << maxCodeSize; + //account for potential overflow + long memoryUsageInBytes = (long) maxTableSize * 6;//(4 (prefixes) + 1 (characters) +1 (outputStack)) + long memoryUsageInKb = memoryUsageInBytes >> 10; + + if (memoryUsageInKb > memoryLimitInKb) { + throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb); + } + } + initializeTables(maxCodeSize); + } + + /** + * Initializes the arrays based on the maximum code size. + * @param maxCodeSize maximum code size + */ + protected void initializeTables(final int maxCodeSize) { + final int maxTableSize = 1 << maxCodeSize; + prefixes = new int[maxTableSize]; + characters = new byte[maxTableSize]; + outputStack = new byte[maxTableSize]; + outputStackLocation = maxTableSize; + final int max = 1 << 8; + for (int i = 0; i < max; i++) { + prefixes[i] = -1; + characters[i] = (byte) i; + } + } + + /** + * Reads the next code from the stream. + * @return the next code + * @throws IOException on error + */ + protected int readNextCode() throws IOException { + if (codeSize > 31) { + throw new IllegalArgumentException("code size must not be bigger than 31"); + } + return (int) in.readBits(codeSize); + } + + /** + * Adds a new entry if the maximum table size hasn't been exceeded + * and returns the new index. + * @param previousCode the previous code + * @param character the character to append + * @param maxTableSize the maximum table size + * @return the new code + */ + protected int addEntry(final int previousCode, final byte character, final int maxTableSize) { + if (tableSize < maxTableSize) { + prefixes[tableSize] = previousCode; + characters[tableSize] = character; + return tableSize++; + } + return -1; + } + + /** + * Add entry for repeat of previousCode we haven't added, yet. + * @return new code for a repeat of the previous code + * @throws IOException on error + */ + protected int addRepeatOfPreviousCode() throws IOException { + if (previousCode == -1) { + // can't have a repeat for the very first code + throw new IOException("The first code can't be a reference to its preceding code"); + } + return addEntry(previousCode, previousCodeFirstChar); + } + + /** + * Expands the entry with index code to the output stack and may + * create a new entry + * @param code the code + * @param addedUnfinishedEntry whether unfinished entries have been added + * @return the new location of the output stack + * @throws IOException on error + */ + protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) + throws IOException { + for (int entry = code; entry >= 0; entry = prefixes[entry]) { + outputStack[--outputStackLocation] = characters[entry]; + } + if (previousCode != -1 && !addedUnfinishedEntry) { + addEntry(previousCode, outputStack[outputStackLocation]); + } + previousCode = code; + previousCodeFirstChar = outputStack[outputStackLocation]; + return outputStackLocation; + } + + private int readFromStack(final byte[] b, final int off, final int len) { + final int remainingInStack = outputStack.length - outputStackLocation; + if (remainingInStack > 0) { + final int maxLength = Math.min(remainingInStack, len); + System.arraycopy(outputStack, outputStackLocation, b, off, maxLength); + outputStackLocation += maxLength; + return maxLength; + } + return 0; + } + + protected int getCodeSize() { + return codeSize; + } + + protected void resetCodeSize() { + setCodeSize(DEFAULT_CODE_SIZE); + } + + protected void setCodeSize(final int cs) { + this.codeSize = cs; + } + + protected void incrementCodeSize() { + codeSize++; + } + + protected void resetPreviousCode() { + this.previousCode = -1; + } + + protected int getPrefix(final int offset) { + return prefixes[offset]; + } + + protected void setPrefix(final int offset, final int value) { + prefixes[offset] = value; + } + + protected int getPrefixesLength() { + return prefixes.length; + } + + protected int getClearCode() { + return clearCode; + } + + protected int getTableSize() { + return tableSize; + } + + protected void setTableSize(final int newSize) { + tableSize = newSize; + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/lzw/package.html b/src/main/java/org/apache/commons/compress/compressors/lzw/package.html new file mode 100644 index 000000000..3c45ca62a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lzw/package.html @@ -0,0 +1,23 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Generic LZW implementation.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/pack200/InMemoryCachingStreamBridge.java b/src/main/java/org/apache/commons/compress/compressors/pack200/InMemoryCachingStreamBridge.java new file mode 100644 index 000000000..e1fdc2cba --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/pack200/InMemoryCachingStreamBridge.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.pack200; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * StreamSwitcher that caches all data written to the output side in + * memory. + * @since 1.3 + */ +class InMemoryCachingStreamBridge extends StreamBridge { + InMemoryCachingStreamBridge() { + super(new ByteArrayOutputStream()); + } + + @Override + InputStream getInputView() throws IOException { + return new ByteArrayInputStream(((ByteArrayOutputStream) out) + .toByteArray()); + } +}
\ No newline at end of file diff --git a/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200CompressorInputStream.java new file mode 100644 index 000000000..04fdc2bb6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200CompressorInputStream.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.pack200; + +import java.io.File; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; +import java.util.jar.JarOutputStream; +import java.util.jar.Pack200; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; + +/** + * An input stream that decompresses from the Pack200 format to be read + * as any other stream. + * + * <p>The {@link CompressorInputStream#getCount getCount} and {@link + * CompressorInputStream#getBytesRead getBytesRead} methods always + * return 0.</p> + * + * @NotThreadSafe + * @since 1.3 + */ +public class Pack200CompressorInputStream extends CompressorInputStream { + private final InputStream originalInput; + private final StreamBridge streamBridge; + + /** + * Decompresses the given stream, caching the decompressed data in + * memory. + * + * <p>When reading from a file the File-arg constructor may + * provide better performance.</p> + * + * @param in the InputStream from which this object should be created + * @throws IOException if reading fails + */ + public Pack200CompressorInputStream(final InputStream in) + throws IOException { + this(in, Pack200Strategy.IN_MEMORY); + } + + /** + * Decompresses the given stream using the given strategy to cache + * the results. + * + * <p>When reading from a file the File-arg constructor may + * provide better performance.</p> + * + * @param in the InputStream from which this object should be created + * @param mode the strategy to use + * @throws IOException if reading fails + */ + public Pack200CompressorInputStream(final InputStream in, + final Pack200Strategy mode) + throws IOException { + this(in, null, mode, null); + } + + /** + * Decompresses the given stream, caching the decompressed data in + * memory and using the given properties. + * + * <p>When reading from a file the File-arg constructor may + * provide better performance.</p> + * + * @param in the InputStream from which this object should be created + * @param props Pack200 properties to use + * @throws IOException if reading fails + */ + public Pack200CompressorInputStream(final InputStream in, + final Map<String, String> props) + throws IOException { + this(in, Pack200Strategy.IN_MEMORY, props); + } + + /** + * Decompresses the given stream using the given strategy to cache + * the results and the given properties. + * + * <p>When reading from a file the File-arg constructor may + * provide better performance.</p> + * + * @param in the InputStream from which this object should be created + * @param mode the strategy to use + * @param props Pack200 properties to use + * @throws IOException if reading fails + */ + public Pack200CompressorInputStream(final InputStream in, + final Pack200Strategy mode, + final Map<String, String> props) + throws IOException { + this(in, null, mode, props); + } + + /** + * Decompresses the given file, caching the decompressed data in + * memory. + * + * @param f the file to decompress + * @throws IOException if reading fails + */ + public Pack200CompressorInputStream(final File f) throws IOException { + this(f, Pack200Strategy.IN_MEMORY); + } + + /** + * Decompresses the given file using the given strategy to cache + * the results. + * + * @param f the file to decompress + * @param mode the strategy to use + * @throws IOException if reading fails + */ + public Pack200CompressorInputStream(final File f, final Pack200Strategy mode) + throws IOException { + this(null, f, mode, null); + } + + /** + * Decompresses the given file, caching the decompressed data in + * memory and using the given properties. + * + * @param f the file to decompress + * @param props Pack200 properties to use + * @throws IOException if reading fails + */ + public Pack200CompressorInputStream(final File f, + final Map<String, String> props) + throws IOException { + this(f, Pack200Strategy.IN_MEMORY, props); + } + + /** + * Decompresses the given file using the given strategy to cache + * the results and the given properties. + * + * @param f the file to decompress + * @param mode the strategy to use + * @param props Pack200 properties to use + * @throws IOException if reading fails + */ + public Pack200CompressorInputStream(final File f, final Pack200Strategy mode, + final Map<String, String> props) + throws IOException { + this(null, f, mode, props); + } + + private Pack200CompressorInputStream(final InputStream in, final File f, + final Pack200Strategy mode, + final Map<String, String> props) + throws IOException { + originalInput = in; + streamBridge = mode.newStreamBridge(); + try (final JarOutputStream jarOut = new JarOutputStream(streamBridge)) { + final Pack200.Unpacker u = Pack200.newUnpacker(); + if (props != null) { + u.properties().putAll(props); + } + if (f == null) { + u.unpack(new FilterInputStream(in) { + @Override + public void close() { + // unpack would close this stream but we + // want to give the user code more control + } + }, jarOut); + } else { + u.unpack(f, jarOut); + } + } + } + + @Override + public int read() throws IOException { + return streamBridge.getInput().read(); + } + + @Override + public int read(final byte[] b) throws IOException { + return streamBridge.getInput().read(b); + } + + @Override + public int read(final byte[] b, final int off, final int count) throws IOException { + return streamBridge.getInput().read(b, off, count); + } + + @Override + public int available() throws IOException { + return streamBridge.getInput().available(); + } + + @Override + public boolean markSupported() { + try { + return streamBridge.getInput().markSupported(); + } catch (final IOException ex) { + return false; + } + } + + @Override + public void mark(final int limit) { + try { + streamBridge.getInput().mark(limit); + } catch (final IOException ex) { + throw new RuntimeException(ex); //NOSONAR + } + } + + @Override + public void reset() throws IOException { + streamBridge.getInput().reset(); + } + + @Override + public long skip(final long count) throws IOException { + return IOUtils.skip(streamBridge.getInput(), count); + } + + @Override + public void close() throws IOException { + try { + streamBridge.stop(); + } finally { + if (originalInput != null) { + originalInput.close(); + } + } + } + + private static final byte[] CAFE_DOOD = new byte[] { + (byte) 0xCA, (byte) 0xFE, (byte) 0xD0, (byte) 0x0D + }; + private static final int SIG_LENGTH = CAFE_DOOD.length; + + /** + * Checks if the signature matches what is expected for a pack200 + * file (0xCAFED00D). + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is a pack200 compressed stream, + * false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < SIG_LENGTH) { + return false; + } + + for (int i = 0; i < SIG_LENGTH; i++) { + if (signature[i] != CAFE_DOOD[i]) { + return false; + } + } + + return true; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200CompressorOutputStream.java new file mode 100644 index 000000000..e5be87b44 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200CompressorOutputStream.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.pack200; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Map; +import java.util.jar.JarInputStream; +import java.util.jar.Pack200; + +import org.apache.commons.compress.compressors.CompressorOutputStream; + +/** + * An output stream that compresses using the Pack200 format. + * + * @NotThreadSafe + * @since 1.3 + */ +public class Pack200CompressorOutputStream extends CompressorOutputStream { + private boolean finished = false; + private final OutputStream originalOutput; + private final StreamBridge streamBridge; + private final Map<String, String> properties; + + /** + * Compresses the given stream, caching the compressed data in + * memory. + * + * @param out the stream to write to + * @throws IOException if writing fails + */ + public Pack200CompressorOutputStream(final OutputStream out) + throws IOException { + this(out, Pack200Strategy.IN_MEMORY); + } + + /** + * Compresses the given stream using the given strategy to cache + * the results. + * + * @param out the stream to write to + * @param mode the strategy to use + * @throws IOException if writing fails + */ + public Pack200CompressorOutputStream(final OutputStream out, + final Pack200Strategy mode) + throws IOException { + this(out, mode, null); + } + + /** + * Compresses the given stream, caching the compressed data in + * memory and using the given properties. + * + * @param out the stream to write to + * @param props Pack200 properties to use + * @throws IOException if writing fails + */ + public Pack200CompressorOutputStream(final OutputStream out, + final Map<String, String> props) + throws IOException { + this(out, Pack200Strategy.IN_MEMORY, props); + } + + /** + * Compresses the given stream using the given strategy to cache + * the results and the given properties. + * + * @param out the stream to write to + * @param mode the strategy to use + * @param props Pack200 properties to use + * @throws IOException if writing fails + */ + public Pack200CompressorOutputStream(final OutputStream out, + final Pack200Strategy mode, + final Map<String, String> props) + throws IOException { + originalOutput = out; + streamBridge = mode.newStreamBridge(); + properties = props; + } + + @Override + public void write(final int b) throws IOException { + streamBridge.write(b); + } + + @Override + public void write(final byte[] b) throws IOException { + streamBridge.write(b); + } + + @Override + public void write(final byte[] b, final int from, final int length) throws IOException { + streamBridge.write(b, from, length); + } + + @Override + public void close() throws IOException { + try { + finish(); + } finally { + try { + streamBridge.stop(); + } finally { + originalOutput.close(); + } + } + } + + public void finish() throws IOException { + if (!finished) { + finished = true; + final Pack200.Packer p = Pack200.newPacker(); + if (properties != null) { + p.properties().putAll(properties); + } + try (JarInputStream ji = new JarInputStream(streamBridge.getInput())) { + p.pack(ji, originalOutput); + } + } + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200Strategy.java b/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200Strategy.java new file mode 100644 index 000000000..dba199296 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200Strategy.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.pack200; + +import java.io.IOException; + +/** + * The different modes the Pack200 streams can use to wrap input and + * output. + * @since 1.3 + */ +public enum Pack200Strategy { + /** Cache output in memory */ + IN_MEMORY() { + @Override + StreamBridge newStreamBridge() { + return new InMemoryCachingStreamBridge(); + } + }, + /** Cache output in a temporary file */ + TEMP_FILE() { + @Override + StreamBridge newStreamBridge() throws IOException { + return new TempFileCachingStreamBridge(); + } + }; + + abstract StreamBridge newStreamBridge() throws IOException; +}
\ No newline at end of file diff --git a/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200Utils.java b/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200Utils.java new file mode 100644 index 000000000..91a54ca5f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/pack200/Pack200Utils.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.pack200; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.jar.JarFile; +import java.util.jar.JarOutputStream; +import java.util.jar.Pack200; + +/** + * Utility methods for Pack200. + * + * @ThreadSafe + * @since 1.3 + */ +public class Pack200Utils { + private Pack200Utils() { } + + /** + * Normalizes a JAR archive in-place so it can be safely signed + * and packed. + * + * <p>As stated in <a + * href="https://download.oracle.com/javase/1.5.0/docs/api/java/util/jar/Pack200.Packer.html">Pack200.Packer's</a> + * javadocs applying a Pack200 compression to a JAR archive will + * in general make its sigantures invalid. In order to prepare a + * JAR for signing it should be "normalized" by packing and + * unpacking it. This is what this method does.</p> + * + * <p>Note this methods implicitly sets the segment length to + * -1.</p> + * + * @param jar the JAR archive to normalize + * @throws IOException if reading or writing fails + */ + public static void normalize(final File jar) + throws IOException { + normalize(jar, jar, null); + } + + /** + * Normalizes a JAR archive in-place so it can be safely signed + * and packed. + * + * <p>As stated in <a + * href="https://download.oracle.com/javase/1.5.0/docs/api/java/util/jar/Pack200.Packer.html">Pack200.Packer's</a> + * javadocs applying a Pack200 compression to a JAR archive will + * in general make its sigantures invalid. In order to prepare a + * JAR for signing it should be "normalized" by packing and + * unpacking it. This is what this method does.</p> + * + * @param jar the JAR archive to normalize + * @param props properties to set for the pack operation. This + * method will implicitly set the segment limit to -1. + * @throws IOException if reading or writing fails + */ + public static void normalize(final File jar, final Map<String, String> props) + throws IOException { + normalize(jar, jar, props); + } + + /** + * Normalizes a JAR archive so it can be safely signed and packed. + * + * <p>As stated in <a + * href="https://download.oracle.com/javase/1.5.0/docs/api/java/util/jar/Pack200.Packer.html">Pack200.Packer's</a> + * javadocs applying a Pack200 compression to a JAR archive will + * in general make its sigantures invalid. In order to prepare a + * JAR for signing it should be "normalized" by packing and + * unpacking it. This is what this method does.</p> + * + * <p>This method does not replace the existing archive but creates + * a new one.</p> + * + * <p>Note this methods implicitly sets the segment length to + * -1.</p> + * + * @param from the JAR archive to normalize + * @param to the normalized archive + * @throws IOException if reading or writing fails + */ + public static void normalize(final File from, final File to) + throws IOException { + normalize(from, to, null); + } + + /** + * Normalizes a JAR archive so it can be safely signed and packed. + * + * <p>As stated in <a + * href="https://download.oracle.com/javase/1.5.0/docs/api/java/util/jar/Pack200.Packer.html">Pack200.Packer's</a> + * javadocs applying a Pack200 compression to a JAR archive will + * in general make its sigantures invalid. In order to prepare a + * JAR for signing it should be "normalized" by packing and + * unpacking it. This is what this method does.</p> + * + * <p>This method does not replace the existing archive but creates + * a new one.</p> + * + * @param from the JAR archive to normalize + * @param to the normalized archive + * @param props properties to set for the pack operation. This + * method will implicitly set the segment limit to -1. + * @throws IOException if reading or writing fails + */ + public static void normalize(final File from, final File to, Map<String, String> props) + throws IOException { + if (props == null) { + props = new HashMap<>(); + } + props.put(Pack200.Packer.SEGMENT_LIMIT, "-1"); + final File tempFile = File.createTempFile("commons-compress", "pack200normalize"); + try { + try (FileOutputStream fos = new FileOutputStream(tempFile); + JarFile jarFile = new JarFile(from)) { + final Pack200.Packer packer = Pack200.newPacker(); + packer.properties().putAll(props); + packer.pack(jarFile, fos); + } + final Pack200.Unpacker unpacker = Pack200.newUnpacker(); + try (JarOutputStream jos = new JarOutputStream(new FileOutputStream(to))) { + unpacker.unpack(tempFile, jos); + } + } finally { + if (!tempFile.delete()) { + tempFile.deleteOnExit(); + } + } + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/pack200/StreamBridge.java b/src/main/java/org/apache/commons/compress/compressors/pack200/StreamBridge.java new file mode 100644 index 000000000..9de3567ce --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/pack200/StreamBridge.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.pack200; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Provides an InputStream to read all data written to this + * OutputStream. + * + * @ThreadSafe + * @since 1.3 + */ +abstract class StreamBridge extends FilterOutputStream { + private InputStream input; + private final Object inputLock = new Object(); + + protected StreamBridge(final OutputStream out) { + super(out); + } + + protected StreamBridge() { + this(null); + } + + /** + * Provides the input view. + */ + InputStream getInput() throws IOException { + synchronized (inputLock) { + if (input == null) { + input = getInputView(); + } + } + return input; + } + + /** + * Creates the input view. + */ + abstract InputStream getInputView() throws IOException; + + /** + * Closes input and output and releases all associated resources. + */ + void stop() throws IOException { + close(); + synchronized (inputLock) { + if (input != null) { + input.close(); + input = null; + } + } + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/pack200/TempFileCachingStreamBridge.java b/src/main/java/org/apache/commons/compress/compressors/pack200/TempFileCachingStreamBridge.java new file mode 100644 index 000000000..27ed2112e --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/pack200/TempFileCachingStreamBridge.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.pack200; + +import java.io.File; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; + +/** + * StreamSwitcher that caches all data written to the output side in + * a temporary file. + * @since 1.3 + */ +class TempFileCachingStreamBridge extends StreamBridge { + private final File f; + + TempFileCachingStreamBridge() throws IOException { + f = File.createTempFile("commons-compress", "packtemp"); + f.deleteOnExit(); + out = Files.newOutputStream(f.toPath()); + } + + @Override + InputStream getInputView() throws IOException { + out.close(); + return new FilterInputStream(Files.newInputStream(f.toPath())) { + @Override + public void close() throws IOException { + try { + super.close(); + } finally { + f.delete(); + } + } + }; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/pack200/package.html b/src/main/java/org/apache/commons/compress/compressors/pack200/package.html new file mode 100644 index 000000000..9dbf2a065 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/pack200/package.html @@ -0,0 +1,82 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for compressing and decompressing + streams using the Pack200 algorithm used to compress Java + archives.</p> + + <p>The streams of this package only work on JAR archives, i.e. a + {@link + org.apache.commons.compress.compressors.pack200.Pack200CompressorOutputStream + Pack200CompressorOutputStream} expects to be wrapped around a + stream that a valid JAR archive will be written to and a {@link + org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream + Pack200CompressorInputStream} provides a stream to read from a + JAR archive.</p> + + <p>JAR archives compressed with Pack200 will in general be + different from the original archive when decompressed again. + For details see + the <a href="https://download.oracle.com/javase/1.5.0/docs/api/java/util/jar/Pack200.html">API + documentation of Pack200</a>.</p> + + <p>The streams of this package work on non-deflated streams, + i.e. archives like those created with the <code>--no-gzip</code> + option of the JDK's <code>pack200</code> command line tool. If + you want to work on deflated streams you must use an additional + stream layer - for example by using Apache Commons Compress' + gzip package.</p> + + <p>The Pack200 API provided by the Java class library doesn't lend + itself to real stream + processing. <code>Pack200CompressorInputStream</code> will + uncompress its input immediately and then provide + an <code>InputStream</code> to a cached result. + Likewise <code>Pack200CompressorOutputStream</code> will not + write anything to the given OutputStream + until <code>finish</code> or <code>close</code> is called - at + which point the cached output written so far gets + compressed.</p> + + <p>Two different caching modes are available - "in memory", which + is the default, and "temporary file". By default data is cached + in memory but you should switch to the temporary file option if + your archives are really big.</p> + + <p>Given there always is an intermediate result + the <code>getBytesRead</code> and <code>getCount</code> methods + of <code>Pack200CompressorInputStream</code> are meaningless + (read from the real stream or from the intermediate result?) + and always return 0.</p> + + <p>During development of the initial version several attempts have + been made to use a real streaming API based for example + on <code>Piped(In|Out)putStream</code> or explicit stream + pumping like Commons Exec's <code>InputStreamPumper</code> but + they have all failed because they rely on the output end to be + consumed completely or else the <code>(un)pack</code> will block + forever. Especially for <code>Pack200InputStream</code> it is + very likely that it will be wrapped in + a <code>ZipArchiveInputStream</code> which will never read the + archive completely as it is not interested in the ZIP central + directory data at the end of the JAR archive.</p> + + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/package.html b/src/main/java/org/apache/commons/compress/compressors/package.html new file mode 100644 index 000000000..7b7d504b9 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides a unified API and factories for dealing with + compressed streams.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java new file mode 100644 index 000000000..f6dc30c0a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.snappy; + +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import java.util.Arrays; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.BoundedInputStream; +import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * CompressorInputStream for the framing Snappy format. + * + * <p>Based on the "spec" in the version "Last revised: 2013-10-25"</p> + * + * @see <a href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy framing format description</a> + * @since 1.7 + */ +public class FramedSnappyCompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + /** + * package private for tests only. + */ + static final long MASK_OFFSET = 0xa282ead8L; + + private static final int STREAM_IDENTIFIER_TYPE = 0xff; + static final int COMPRESSED_CHUNK_TYPE = 0; + private static final int UNCOMPRESSED_CHUNK_TYPE = 1; + private static final int PADDING_CHUNK_TYPE = 0xfe; + private static final int MIN_UNSKIPPABLE_TYPE = 2; + private static final int MAX_UNSKIPPABLE_TYPE = 0x7f; + private static final int MAX_SKIPPABLE_TYPE = 0xfd; + + // used by FramedSnappyCompressorOutputStream as well + static final byte[] SZ_SIGNATURE = new byte[] { //NOSONAR + (byte) STREAM_IDENTIFIER_TYPE, // tag + 6, 0, 0, // length + 's', 'N', 'a', 'P', 'p', 'Y' + }; + + private long unreadBytes; + private final CountingInputStream countingStream; + + /** The underlying stream to read compressed data from */ + private final PushbackInputStream in; + + /** The dialect to expect */ + private final FramedSnappyDialect dialect; + + private SnappyCompressorInputStream currentCompressedChunk; + + // used in no-arg read method + private final byte[] oneByte = new byte[1]; + + private boolean endReached, inUncompressedChunk; + + private int uncompressedBytesRemaining; + private long expectedChecksum = -1; + private final int blockSize; + private final PureJavaCrc32C checksum = new PureJavaCrc32C(); + + private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { + @Override + public int getAsByte() throws IOException { + return readOneByte(); + } + }; + + /** + * Constructs a new input stream that decompresses + * snappy-framed-compressed data from the specified input stream + * using the {@link FramedSnappyDialect#STANDARD} dialect. + * @param in the InputStream from which to read the compressed data + * @throws IOException if reading fails + */ + public FramedSnappyCompressorInputStream(final InputStream in) throws IOException { + this(in, FramedSnappyDialect.STANDARD); + } + + /** + * Constructs a new input stream that decompresses snappy-framed-compressed data + * from the specified input stream. + * @param in the InputStream from which to read the compressed data + * @param dialect the dialect used by the compressed stream + * @throws IOException if reading fails + */ + public FramedSnappyCompressorInputStream(final InputStream in, + final FramedSnappyDialect dialect) + throws IOException { + this(in, SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE, dialect); + } + + /** + * Constructs a new input stream that decompresses snappy-framed-compressed data + * from the specified input stream. + * @param in the InputStream from which to read the compressed data + * @param blockSize the block size to use for the compressed stream + * @param dialect the dialect used by the compressed stream + * @throws IOException if reading fails + * @since 1.14 + */ + public FramedSnappyCompressorInputStream(final InputStream in, + final int blockSize, + final FramedSnappyDialect dialect) + throws IOException { + countingStream = new CountingInputStream(in); + this.in = new PushbackInputStream(countingStream, 1); + this.blockSize = blockSize; + this.dialect = dialect; + if (dialect.hasStreamIdentifier()) { + readStreamIdentifier(); + } + } + + /** {@inheritDoc} */ + @Override + public int read() throws IOException { + return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + try { + if (currentCompressedChunk != null) { + currentCompressedChunk.close(); + currentCompressedChunk = null; + } + } finally { + in.close(); + } + } + + /** {@inheritDoc} */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + int read = readOnce(b, off, len); + if (read == -1) { + readNextBlock(); + if (endReached) { + return -1; + } + read = readOnce(b, off, len); + } + return read; + } + + /** {@inheritDoc} */ + @Override + public int available() throws IOException { + if (inUncompressedChunk) { + return Math.min(uncompressedBytesRemaining, + in.available()); + } else if (currentCompressedChunk != null) { + return currentCompressedChunk.available(); + } + return 0; + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return countingStream.getBytesRead() - unreadBytes; + } + + /** + * Read from the current chunk into the given array. + * + * @return -1 if there is no current chunk or the number of bytes + * read from the current chunk (which may be -1 if the end of the + * chunk is reached). + */ + private int readOnce(final byte[] b, final int off, final int len) throws IOException { + int read = -1; + if (inUncompressedChunk) { + final int amount = Math.min(uncompressedBytesRemaining, len); + if (amount == 0) { + return -1; + } + read = in.read(b, off, amount); + if (read != -1) { + uncompressedBytesRemaining -= read; + count(read); + } + } else if (currentCompressedChunk != null) { + final long before = currentCompressedChunk.getBytesRead(); + read = currentCompressedChunk.read(b, off, len); + if (read == -1) { + currentCompressedChunk.close(); + currentCompressedChunk = null; + } else { + count(currentCompressedChunk.getBytesRead() - before); + } + } + if (read > 0) { + checksum.update(b, off, read); + } + return read; + } + + private void readNextBlock() throws IOException { + verifyLastChecksumAndReset(); + inUncompressedChunk = false; + final int type = readOneByte(); + if (type == -1) { + endReached = true; + } else if (type == STREAM_IDENTIFIER_TYPE) { + in.unread(type); + unreadBytes++; + pushedBackBytes(1); + readStreamIdentifier(); + readNextBlock(); + } else if (type == PADDING_CHUNK_TYPE + || (type > MAX_UNSKIPPABLE_TYPE && type <= MAX_SKIPPABLE_TYPE)) { + skipBlock(); + readNextBlock(); + } else if (type >= MIN_UNSKIPPABLE_TYPE && type <= MAX_UNSKIPPABLE_TYPE) { + throw new IOException("unskippable chunk with type " + type + + " (hex " + Integer.toHexString(type) + ")" + + " detected."); + } else if (type == UNCOMPRESSED_CHUNK_TYPE) { + inUncompressedChunk = true; + uncompressedBytesRemaining = readSize() - 4 /* CRC */; + expectedChecksum = unmask(readCrc()); + } else if (type == COMPRESSED_CHUNK_TYPE) { + final boolean expectChecksum = dialect.usesChecksumWithCompressedChunks(); + final long size = readSize() - (expectChecksum ? 4L : 0L); + if (expectChecksum) { + expectedChecksum = unmask(readCrc()); + } else { + expectedChecksum = -1; + } + currentCompressedChunk = + new SnappyCompressorInputStream(new BoundedInputStream(in, size), blockSize); + // constructor reads uncompressed size + count(currentCompressedChunk.getBytesRead()); + } else { + // impossible as all potential byte values have been covered + throw new IOException("unknown chunk type " + type + + " detected."); + } + } + + private long readCrc() throws IOException { + final byte[] b = new byte[4]; + final int read = IOUtils.readFully(in, b); + count(read); + if (read != 4) { + throw new IOException("premature end of stream"); + } + return ByteUtils.fromLittleEndian(b); + } + + static long unmask(long x) { + // ugly, maybe we should just have used ints and deal with the + // overflow + x -= MASK_OFFSET; + x &= 0xffffFFFFL; + return ((x >> 17) | (x << 15)) & 0xffffFFFFL; + } + + private int readSize() throws IOException { + return (int) ByteUtils.fromLittleEndian(supplier, 3); + } + + private void skipBlock() throws IOException { + final int size = readSize(); + final long read = IOUtils.skip(in, size); + count(read); + if (read != size) { + throw new IOException("premature end of stream"); + } + } + + private void readStreamIdentifier() throws IOException { + final byte[] b = new byte[10]; + final int read = IOUtils.readFully(in, b); + count(read); + if (10 != read || !matches(b, 10)) { + throw new IOException("Not a framed Snappy stream"); + } + } + + private int readOneByte() throws IOException { + final int b = in.read(); + if (b != -1) { + count(1); + return b & 0xFF; + } + return -1; + } + + private void verifyLastChecksumAndReset() throws IOException { + if (expectedChecksum >= 0 && expectedChecksum != checksum.getValue()) { + throw new IOException("Checksum verification failed"); + } + expectedChecksum = -1; + checksum.reset(); + } + + /** + * Checks if the signature matches what is expected for a .sz file. + * + * <p>.sz files start with a chunk with tag 0xff and content sNaPpY.</p> + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true if this is a .sz stream, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + + if (length < SZ_SIGNATURE.length) { + return false; + } + + byte[] shortenedSig = signature; + if (signature.length > SZ_SIGNATURE.length) { + shortenedSig = new byte[SZ_SIGNATURE.length]; + System.arraycopy(signature, 0, shortenedSig, 0, SZ_SIGNATURE.length); + } + + return Arrays.equals(shortenedSig, SZ_SIGNATURE); + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorOutputStream.java new file mode 100644 index 000000000..4449b28b2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorOutputStream.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.snappy; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.compress.compressors.lz77support.Parameters; +import org.apache.commons.compress.utils.ByteUtils; + +/** + * CompressorOutputStream for the framing Snappy format. + * + * <p>Based on the "spec" in the version "Last revised: 2013-10-25"</p> + * + * @see <a href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy framing format description</a> + * @since 1.14 + * @NotThreadSafe + */ +public class FramedSnappyCompressorOutputStream extends CompressorOutputStream { + // see spec: + // > However, we place an additional restriction that the uncompressed data + // > in a chunk must be no longer than 65536 bytes. This allows consumers to + // > easily use small fixed-size buffers. + private static final int MAX_COMPRESSED_BUFFER_SIZE = 1 << 16; + + private final OutputStream out; + private final Parameters params; + private final PureJavaCrc32C checksum = new PureJavaCrc32C(); + // used in one-arg write method + private final byte[] oneByte = new byte[1]; + private final byte[] buffer = new byte[MAX_COMPRESSED_BUFFER_SIZE]; + private int currentIndex = 0; + + private final ByteUtils.ByteConsumer consumer; + + /** + * Constructs a new output stream that compresses + * snappy-framed-compressed data to the specified output stream. + * @param out the OutputStream to which to write the compressed data + * @throws IOException if writing the signature fails + */ + public FramedSnappyCompressorOutputStream(final OutputStream out) throws IOException { + this(out, SnappyCompressorOutputStream.createParameterBuilder(SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE) + .build()); + } + + /** + * Constructs a new output stream that compresses + * snappy-framed-compressed data to the specified output stream. + * @param out the OutputStream to which to write the compressed data + * @param params parameters used to fine-tune compression, in + * particular to balance compression ratio vs compression speed. + * @throws IOException if writing the signature fails + */ + public FramedSnappyCompressorOutputStream(final OutputStream out, Parameters params) throws IOException { + this.out = out; + this.params = params; + consumer = new ByteUtils.OutputStreamByteConsumer(out); + out.write(FramedSnappyCompressorInputStream.SZ_SIGNATURE); + } + + @Override + public void write(int b) throws IOException { + oneByte[0] = (byte) (b & 0xff); + write(oneByte); + } + + @Override + public void write(byte[] data, int off, int len) throws IOException { + if (currentIndex + len > MAX_COMPRESSED_BUFFER_SIZE) { + flushBuffer(); + while (len > MAX_COMPRESSED_BUFFER_SIZE) { + System.arraycopy(data, off, buffer, 0, MAX_COMPRESSED_BUFFER_SIZE); + off += MAX_COMPRESSED_BUFFER_SIZE; + len -= MAX_COMPRESSED_BUFFER_SIZE; + currentIndex = MAX_COMPRESSED_BUFFER_SIZE; + flushBuffer(); + } + } + System.arraycopy(data, off, buffer, currentIndex, len); + currentIndex += len; + } + + @Override + public void close() throws IOException { + try { + finish(); + } finally { + out.close(); + } + } + + /** + * Compresses all remaining data and writes it to the stream, + * doesn't close the underlying stream. + * @throws IOException if an error occurs + */ + public void finish() throws IOException { + if (currentIndex > 0) { + flushBuffer(); + } + } + + private void flushBuffer() throws IOException { + out.write(FramedSnappyCompressorInputStream.COMPRESSED_CHUNK_TYPE); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (OutputStream o = new SnappyCompressorOutputStream(baos, currentIndex, params)) { + o.write(buffer, 0, currentIndex); + } + byte[] b = baos.toByteArray(); + writeLittleEndian(3, b.length + 4L /* CRC */); + writeCrc(); + out.write(b); + currentIndex = 0; + } + + private void writeLittleEndian(final int numBytes, long num) throws IOException { + ByteUtils.toLittleEndian(consumer, num, numBytes); + } + + private void writeCrc() throws IOException { + checksum.update(buffer, 0, currentIndex); + writeLittleEndian(4, mask(checksum.getValue())); + checksum.reset(); + } + + static long mask(long x) { + // ugly, maybe we should just have used ints and deal with the + // overflow + x = ((x >> 15) | (x << 17)); + x += FramedSnappyCompressorInputStream.MASK_OFFSET; + x &= 0xffffFFFFL; + return x; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java new file mode 100644 index 000000000..b83b7a509 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.snappy; + +/** + * Dialects of the framing format that {@link FramedSnappyCompressorInputStream} can deal with. + * @since 1.12 + */ +public enum FramedSnappyDialect { + /** + * The standard as defined by the <a + * href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy + * framing format description</a> + */ + STANDARD(true, true), + /** + * The format used by Apple's iWork Archives (.iwa files). + */ + IWORK_ARCHIVE(false, false); + + private final boolean streamIdentifier, checksumWithCompressedChunks; + + FramedSnappyDialect(final boolean hasStreamIdentifier, + final boolean usesChecksumWithCompressedChunks) { + this.streamIdentifier = hasStreamIdentifier; + this.checksumWithCompressedChunks = usesChecksumWithCompressedChunks; + } + + boolean hasStreamIdentifier() { + return streamIdentifier; + } + + boolean usesChecksumWithCompressedChunks() { + return checksumWithCompressedChunks; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/PureJavaCrc32C.java b/src/main/java/org/apache/commons/compress/compressors/snappy/PureJavaCrc32C.java new file mode 100644 index 000000000..4c9738b91 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/PureJavaCrc32C.java @@ -0,0 +1,638 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Some portions of this file Copyright (c) 2004-2006 Intel Corportation + * and licensed under the BSD license. + */ +package org.apache.commons.compress.compressors.snappy; + +import java.util.zip.Checksum; + +/** + * A pure-java implementation of the CRC32 checksum that uses + * the CRC32-C polynomial, the same polynomial used by iSCSI + * and implemented on many Intel chipsets supporting SSE4.2. + * + * <p>This file is a copy of the implementation at the Apache Hadoop project.</p> + * @see "https://svn.apache.org/repos/asf/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java" + * @NotThreadSafe + * @since 1.7 + */ +final class PureJavaCrc32C implements Checksum { + + /** the current CRC value, bit-flipped */ + private int crc; + + /** Create a new PureJavaCrc32 object. */ + public PureJavaCrc32C() { + reset(); // non-private, but the class is now final + } + + @Override + public long getValue() { + final long ret = crc; + return (~ret) & 0xffffffffL; + } + + @Override + // called by ctor but the class is final so this is safe + public void reset() { + crc = 0xffffffff; + } + + @Override + public void update(final byte[] b, int off, int len) { + int localCrc = crc; + + while(len > 7) { + final int c0 =(b[off+0] ^ localCrc) & 0xff; + final int c1 =(b[off+1] ^ (localCrc >>>= 8)) & 0xff; //NOSONAR + final int c2 =(b[off+2] ^ (localCrc >>>= 8)) & 0xff; //NOSONAR + final int c3 =(b[off+3] ^ (localCrc >>>= 8)) & 0xff; //NOSONAR + localCrc = (T[T8_7_START + c0] ^ T[T8_6_START + c1]) + ^ (T[T8_5_START + c2] ^ T[T8_4_START + c3]); + + final int c4 = b[off+4] & 0xff; + final int c5 = b[off+5] & 0xff; + final int c6 = b[off+6] & 0xff; + final int c7 = b[off+7] & 0xff; + + localCrc ^= (T[T8_3_START + c4] ^ T[T8_2_START + c5]) + ^ (T[T8_1_START + c6] ^ T[T8_0_START + c7]); + + off += 8; + len -= 8; + } + + /* loop unroll - duff's device style */ + switch(len) { + case 7: localCrc = (localCrc >>> 8) ^ T[T8_0_START + ((localCrc ^ b[off++]) & 0xff)]; + case 6: localCrc = (localCrc >>> 8) ^ T[T8_0_START + ((localCrc ^ b[off++]) & 0xff)]; + case 5: localCrc = (localCrc >>> 8) ^ T[T8_0_START + ((localCrc ^ b[off++]) & 0xff)]; + case 4: localCrc = (localCrc >>> 8) ^ T[T8_0_START + ((localCrc ^ b[off++]) & 0xff)]; + case 3: localCrc = (localCrc >>> 8) ^ T[T8_0_START + ((localCrc ^ b[off++]) & 0xff)]; + case 2: localCrc = (localCrc >>> 8) ^ T[T8_0_START + ((localCrc ^ b[off++]) & 0xff)]; + case 1: localCrc = (localCrc >>> 8) ^ T[T8_0_START + ((localCrc ^ b[off++]) & 0xff)]; + default: + /* nothing */ + } + + // Publish crc out to object + crc = localCrc; + } + + @Override + final public void update(final int b) { + crc = (crc >>> 8) ^ T[T8_0_START + ((crc ^ b) & 0xff)]; + } + + // CRC polynomial tables generated by: + // java -cp build/test/classes/:build/classes/ \ + // org.apache.hadoop.util.TestPureJavaCrc32\$Table 82F63B78 + + private static final int T8_0_START = 0*256; + private static final int T8_1_START = 1*256; + private static final int T8_2_START = 2*256; + private static final int T8_3_START = 3*256; + private static final int T8_4_START = 4*256; + private static final int T8_5_START = 5*256; + private static final int T8_6_START = 6*256; + private static final int T8_7_START = 7*256; + + private static final int[] T = new int[] { + /* T8_0 */ + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, + 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, + 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, + 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, + 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, + 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, + 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, + 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, + 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, + 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, + 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, + 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, + 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, + 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, + 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, + 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, + 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, + 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, + 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, + 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, + 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, + 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, + 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, + 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, + 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, + 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, + 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, + 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, + 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, + 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, + 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, + 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, + 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, + 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, + 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, + 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, + 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, + 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, + 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, + 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, + 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, + 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, + 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, + 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, + 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, + 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, + 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, + 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, + 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, + 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, + 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, + 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, + 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, + 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, + 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, + 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, + 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, + 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351, + /* T8_1 */ + 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, + 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, + 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, + 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, + 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, + 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, + 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, + 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C, + 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, + 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, + 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, + 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF, + 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, + 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6, + 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, + 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E, + 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, + 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41, + 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, + 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9, + 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, + 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, + 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, + 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78, + 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, + 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43, + 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, + 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, + 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, + 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2, + 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, + 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A, + 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, + 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC, + 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, + 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004, + 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, + 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D, + 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, + 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, + 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, + 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE, + 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, + 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306, + 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, + 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, + 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, + 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287, + 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, + 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8, + 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, + 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600, + 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, + 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439, + 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, + 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781, + 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, + 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, + 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, + 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502, + 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, + 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, + 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, + 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483, + /* T8_2 */ + 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, + 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, + 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, + 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, + 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, + 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, + 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, + 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726, + 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, + 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, + 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, + 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8, + 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, + 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7, + 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, + 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32, + 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, + 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0, + 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, + 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75, + 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, + 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, + 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, + 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF, + 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, + 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4, + 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, + 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, + 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, + 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E, + 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, + 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB, + 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, + 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A, + 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, + 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF, + 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, + 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0, + 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, + 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, + 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, + 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E, + 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, + 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB, + 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, + 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, + 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, + 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71, + 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, + 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3, + 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, + 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36, + 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, + 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79, + 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, + 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC, + 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, + 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, + 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, + 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622, + 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, + 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, + 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, + 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8, + /* T8_3 */ + 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, + 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, + 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, + 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, + 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, + 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, + 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, + 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11, + 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, + 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, + 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, + 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7, + 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, + 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C, + 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, + 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A, + 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, + 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D, + 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, + 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB, + 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, + 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, + 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, + 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6, + 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, + 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6, + 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, + 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, + 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, + 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B, + 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, + 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D, + 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, + 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5, + 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, + 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213, + 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, + 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8, + 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, + 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, + 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, + 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E, + 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, + 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698, + 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, + 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, + 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, + 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5, + 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, + 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12, + 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, + 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4, + 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, + 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F, + 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, + 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9, + 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, + 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, + 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, + 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F, + 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, + 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, + 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, + 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842, + /* T8_4 */ + 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, + 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, + 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, + 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, + 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, + 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, + 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, + 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406, + 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, + 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, + 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, + 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082, + 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, + 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0, + 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, + 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151, + 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, + 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA, + 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, + 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B, + 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, + 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, + 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, + 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8, + 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, + 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD, + 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, + 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, + 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, + 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E, + 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, + 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF, + 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, + 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18, + 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, + 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089, + 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, + 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB, + 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, + 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, + 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, + 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F, + 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, + 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE, + 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, + 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, + 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, + 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D, + 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, + 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6, + 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, + 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27, + 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, + 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065, + 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, + 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4, + 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, + 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, + 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, + 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70, + 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, + 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, + 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, + 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3, + /* T8_5 */ + 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, + 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, + 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, + 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, + 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, + 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, + 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, + 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C, + 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, + 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, + 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, + 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F, + 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, + 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E, + 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, + 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201, + 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, + 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746, + 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, + 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59, + 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, + 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, + 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, + 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67, + 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, + 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB, + 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, + 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, + 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, + 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5, + 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, + 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA, + 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, + 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B, + 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, + 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364, + 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, + 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45, + 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, + 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, + 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, + 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6, + 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, + 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9, + 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, + 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, + 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, + 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7, + 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, + 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090, + 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, + 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F, + 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, + 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE, + 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, + 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1, + 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, + 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, + 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, + 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02, + 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, + 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, + 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, + 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C, + /* T8_6 */ + 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, + 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, + 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, + 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, + 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, + 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, + 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, + 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C, + 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, + 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, + 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, + 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67, + 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, + 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992, + 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, + 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1, + 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, + 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3, + 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, + 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0, + 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, + 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, + 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, + 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006, + 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, + 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E, + 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, + 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, + 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, + 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8, + 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, + 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB, + 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, + 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D, + 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, + 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E, + 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, + 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB, + 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, + 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, + 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, + 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0, + 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, + 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093, + 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, + 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, + 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, + 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35, + 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, + 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907, + 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, + 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454, + 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, + 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1, + 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, + 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2, + 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, + 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, + 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, + 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9, + 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, + 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, + 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, + 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F, + /* T8_7 */ + 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, + 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, + 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, + 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, + 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, + 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, + 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, + 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A, + 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, + 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, + 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, + 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447, + 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, + 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929, + 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, + 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3, + 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, + 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36, + 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, + 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC, + 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, + 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, + 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, + 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358, + 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, + 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF, + 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, + 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, + 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, + 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B, + 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, + 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1, + 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, + 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360, + 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, + 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA, + 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, + 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4, + 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, + 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, + 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, + 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9, + 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, + 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223, + 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, + 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, + 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, + 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97, + 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, + 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852, + 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, + 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88, + 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, + 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6, + 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, + 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C, + 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, + 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, + 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, + 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911, + 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, + 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F, + 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, + 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5 + }; +} diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/SnappyCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/snappy/SnappyCompressorInputStream.java new file mode 100644 index 000000000..4650cb865 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/SnappyCompressorInputStream.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.snappy; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream; +import org.apache.commons.compress.utils.ByteUtils; + +/** + * CompressorInputStream for the raw Snappy format. + * + * <p>This implementation uses an internal buffer in order to handle + * the back-references that are at the heart of the LZ77 algorithm. + * The size of the buffer must be at least as big as the biggest + * offset used in the compressed stream. The current version of the + * Snappy algorithm as defined by Google works on 32k blocks and + * doesn't contain offsets bigger than 32k which is the default block + * size used by this class.</p> + * + * @see <a href="https://github.com/google/snappy/blob/master/format_description.txt">Snappy compressed format description</a> + * @since 1.7 + */ +public class SnappyCompressorInputStream extends AbstractLZ77CompressorInputStream { + + /** Mask used to determine the type of "tag" is being processed */ + private static final int TAG_MASK = 0x03; + + /** Default block size */ + public static final int DEFAULT_BLOCK_SIZE = 32768; + + /** The size of the uncompressed data */ + private final int size; + + /** Number of uncompressed bytes still to be read. */ + private int uncompressedBytesRemaining; + + /** Current state of the stream */ + private State state = State.NO_BLOCK; + + private boolean endReached = false; + + /** + * Constructor using the default buffer size of 32k. + * + * @param is + * An InputStream to read compressed data from + * + * @throws IOException if reading fails + */ + public SnappyCompressorInputStream(final InputStream is) throws IOException { + this(is, DEFAULT_BLOCK_SIZE); + } + + /** + * Constructor using a configurable buffer size. + * + * @param is + * An InputStream to read compressed data from + * @param blockSize + * The block size used in compression + * + * @throws IOException if reading fails + */ + public SnappyCompressorInputStream(final InputStream is, final int blockSize) + throws IOException { + super(is, blockSize); + uncompressedBytesRemaining = size = (int) readSize(); + } + + /** + * {@inheritDoc} + */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (endReached) { + return -1; + } + switch (state) { + case NO_BLOCK: + fill(); + return read(b, off, len); + case IN_LITERAL: + int litLen = readLiteral(b, off, len); + if (!hasMoreDataInBlock()) { + state = State.NO_BLOCK; + } + return litLen > 0 ? litLen : read(b, off, len); + case IN_BACK_REFERENCE: + int backReferenceLen = readBackReference(b, off, len); + if (!hasMoreDataInBlock()) { + state = State.NO_BLOCK; + } + return backReferenceLen > 0 ? backReferenceLen : read(b, off, len); + default: + throw new IOException("Unknown stream state " + state); + } + } + + /** + * Try to fill the buffer with the next block of data. + */ + private void fill() throws IOException { + if (uncompressedBytesRemaining == 0) { + endReached = true; + return; + } + + int b = readOneByte(); + if (b == -1) { + throw new IOException("Premature end of stream reading block start"); + } + int length = 0; + int offset = 0; + + switch (b & TAG_MASK) { + + case 0x00: + + length = readLiteralLength(b); + uncompressedBytesRemaining -= length; + startLiteral(length); + state = State.IN_LITERAL; + break; + + case 0x01: + + /* + * These elements can encode lengths between [4..11] bytes and + * offsets between [0..2047] bytes. (len-4) occupies three bits + * and is stored in bits [2..4] of the tag byte. The offset + * occupies 11 bits, of which the upper three are stored in the + * upper three bits ([5..7]) of the tag byte, and the lower + * eight are stored in a byte following the tag byte. + */ + + length = 4 + ((b >> 2) & 0x07); + uncompressedBytesRemaining -= length; + offset = (b & 0xE0) << 3; + b = readOneByte(); + if (b == -1) { + throw new IOException("Premature end of stream reading back-reference length"); + } + offset |= b; + + startBackReference(offset, length); + state = State.IN_BACK_REFERENCE; + break; + + case 0x02: + + /* + * These elements can encode lengths between [1..64] and offsets + * from [0..65535]. (len-1) occupies six bits and is stored in + * the upper six bits ([2..7]) of the tag byte. The offset is + * stored as a little-endian 16-bit integer in the two bytes + * following the tag byte. + */ + + length = (b >> 2) + 1; + uncompressedBytesRemaining -= length; + + offset = (int) ByteUtils.fromLittleEndian(supplier, 2); + + startBackReference(offset, length); + state = State.IN_BACK_REFERENCE; + break; + + case 0x03: + + /* + * These are like the copies with 2-byte offsets (see previous + * subsection), except that the offset is stored as a 32-bit + * integer instead of a 16-bit integer (and thus will occupy + * four bytes). + */ + + length = (b >> 2) + 1; + uncompressedBytesRemaining -= length; + + offset = (int) ByteUtils.fromLittleEndian(supplier, 4) & 0x7fffffff; + + startBackReference(offset, length); + state = State.IN_BACK_REFERENCE; + break; + default: + // impossible as TAG_MASK is two bits and all four possible cases have been covered + break; + } + } + + /* + * For literals up to and including 60 bytes in length, the + * upper six bits of the tag byte contain (len-1). The literal + * follows immediately thereafter in the bytestream. - For + * longer literals, the (len-1) value is stored after the tag + * byte, little-endian. The upper six bits of the tag byte + * describe how many bytes are used for the length; 60, 61, 62 + * or 63 for 1-4 bytes, respectively. The literal itself follows + * after the length. + */ + private int readLiteralLength(final int b) throws IOException { + int length; + switch (b >> 2) { + case 60: + length = readOneByte(); + if (length == -1) { + throw new IOException("Premature end of stream reading literal length"); + } + break; + case 61: + length = (int) ByteUtils.fromLittleEndian(supplier, 2); + break; + case 62: + length = (int) ByteUtils.fromLittleEndian(supplier, 3); + break; + case 63: + length = (int) ByteUtils.fromLittleEndian(supplier, 4); + break; + default: + length = b >> 2; + break; + } + + return length + 1; + } + + /** + * The stream starts with the uncompressed length (up to a maximum of 2^32 - + * 1), stored as a little-endian varint. Varints consist of a series of + * bytes, where the lower 7 bits are data and the upper bit is set iff there + * are more bytes to be read. In other words, an uncompressed length of 64 + * would be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE) + * would be stored as 0xFE 0xFF 0x7F. + * + * @return The size of the uncompressed data + * + * @throws IOException + * Could not read a byte + */ + private long readSize() throws IOException { + int index = 0; + long sz = 0; + int b = 0; + + do { + b = readOneByte(); + if (b == -1) { + throw new IOException("Premature end of stream reading size"); + } + sz |= (b & 0x7f) << (index++ * 7); + } while (0 != (b & 0x80)); + return sz; + } + + /** + * Get the uncompressed size of the stream + * + * @return the uncompressed size + */ + @Override + public int getSize() { + return size; + } + + private enum State { + NO_BLOCK, IN_LITERAL, IN_BACK_REFERENCE + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/SnappyCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/snappy/SnappyCompressorOutputStream.java new file mode 100644 index 000000000..0c7b3b1ce --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/SnappyCompressorOutputStream.java @@ -0,0 +1,288 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.snappy; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.compress.compressors.lz77support.LZ77Compressor; +import org.apache.commons.compress.compressors.lz77support.Parameters; +import org.apache.commons.compress.utils.ByteUtils; + +/** + * CompressorOutputStream for the raw Snappy format. + * + * <p>This implementation uses an internal buffer in order to handle + * the back-references that are at the heart of the LZ77 algorithm. + * The size of the buffer must be at least as big as the biggest + * offset used in the compressed stream. The current version of the + * Snappy algorithm as defined by Google works on 32k blocks and + * doesn't contain offsets bigger than 32k which is the default block + * size used by this class.</p> + * + * <p>The raw Snappy format requires the uncompressed size to be + * written at the beginning of the stream using a varint + * representation, i.e. the number of bytes needed to write the + * information is not known before the uncompressed size is + * known. We've chosen to make the uncompressedSize a parameter of the + * constructor in favor of buffering the whole output until the size + * is known. When using the {@link FramedSnappyCompressorOutputStream} + * this limitation is taken care of by the warpping framing + * format.</p> + * + * @see <a href="https://github.com/google/snappy/blob/master/format_description.txt">Snappy compressed format description</a> + * @since 1.14 + * @NotThreadSafe + */ +public class SnappyCompressorOutputStream extends CompressorOutputStream { + private final LZ77Compressor compressor; + private final OutputStream os; + private final ByteUtils.ByteConsumer consumer; + + // used in one-arg write method + private final byte[] oneByte = new byte[1]; + + private boolean finished = false; + + /** + * Constructor using the default block size of 32k. + * + * @param os the outputstream to write compressed data to + * @param uncompressedSize the uncompressed size of data + * @throws IOException if writing of the size fails + */ + public SnappyCompressorOutputStream(final OutputStream os, final long uncompressedSize) throws IOException { + this(os, uncompressedSize, SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE); + } + + /** + * Constructor using a configurable block size. + * + * @param os the outputstream to write compressed data to + * @param uncompressedSize the uncompressed size of data + * @param blockSize the block size used - must be a power of two + * @throws IOException if writing of the size fails + */ + public SnappyCompressorOutputStream(final OutputStream os, final long uncompressedSize, final int blockSize) + throws IOException { + this(os, uncompressedSize, createParameterBuilder(blockSize).build()); + } + + /** + * Constructor providing full control over the underlying LZ77 compressor. + * + * @param os the outputstream to write compressed data to + * @param uncompressedSize the uncompressed size of data + * @param params the parameters to use by the compressor - note + * that the format itself imposes some limits like a maximum match + * length of 64 bytes + * @throws IOException if writing of the size fails + */ + public SnappyCompressorOutputStream(final OutputStream os, final long uncompressedSize, Parameters params) + throws IOException { + this.os = os; + consumer = new ByteUtils.OutputStreamByteConsumer(os); + compressor = new LZ77Compressor(params, new LZ77Compressor.Callback() { + @Override + public void accept(LZ77Compressor.Block block) throws IOException { + switch (block.getType()) { + case LITERAL: + writeLiteralBlock((LZ77Compressor.LiteralBlock) block); + break; + case BACK_REFERENCE: + writeBackReference((LZ77Compressor.BackReference) block); + break; + case EOD: + break; + } + } + }); + writeUncompressedSize(uncompressedSize); + } + + @Override + public void write(int b) throws IOException { + oneByte[0] = (byte) (b & 0xff); + write(oneByte); + } + + @Override + public void write(byte[] data, int off, int len) throws IOException { + compressor.compress(data, off, len); + } + + @Override + public void close() throws IOException { + try { + finish(); + } finally { + os.close(); + } + } + + /** + * Compresses all remaining data and writes it to the stream, + * doesn't close the underlying stream. + * @throws IOException if an error occurs + */ + public void finish() throws IOException { + if (!finished) { + compressor.finish(); + finished = true; + } + } + + private void writeUncompressedSize(long uncompressedSize) throws IOException { + boolean more = false; + do { + int currentByte = (int) (uncompressedSize & 0x7F); + more = uncompressedSize > currentByte; + if (more) { + currentByte |= 0x80; + } + os.write(currentByte); + uncompressedSize >>= 7; + } while (more); + } + + // literal length is stored as (len - 1) either inside the tag + // (six bits minus four flags) or in 1 to 4 bytes after the tag + private static final int MAX_LITERAL_SIZE_WITHOUT_SIZE_BYTES = 60; + private static final int MAX_LITERAL_SIZE_WITH_ONE_SIZE_BYTE = 1 << 8; + private static final int MAX_LITERAL_SIZE_WITH_TWO_SIZE_BYTES = 1 << 16; + private static final int MAX_LITERAL_SIZE_WITH_THREE_SIZE_BYTES = 1 << 24; + + private static final int ONE_SIZE_BYTE_MARKER = 60 << 2; + private static final int TWO_SIZE_BYTE_MARKER = 61 << 2; + private static final int THREE_SIZE_BYTE_MARKER = 62 << 2; + private static final int FOUR_SIZE_BYTE_MARKER = 63 << 2; + + private void writeLiteralBlock(LZ77Compressor.LiteralBlock block) throws IOException { + int len = block.getLength(); + if (len <= MAX_LITERAL_SIZE_WITHOUT_SIZE_BYTES) { + writeLiteralBlockNoSizeBytes(block, len); + } else if (len <= MAX_LITERAL_SIZE_WITH_ONE_SIZE_BYTE) { + writeLiteralBlockOneSizeByte(block, len); + } else if (len <= MAX_LITERAL_SIZE_WITH_TWO_SIZE_BYTES) { + writeLiteralBlockTwoSizeBytes(block, len); + } else if (len <= MAX_LITERAL_SIZE_WITH_THREE_SIZE_BYTES) { + writeLiteralBlockThreeSizeBytes(block, len); + } else { + writeLiteralBlockFourSizeBytes(block, len); + } + } + + private void writeLiteralBlockNoSizeBytes(LZ77Compressor.LiteralBlock block, int len) throws IOException { + writeLiteralBlockWithSize(len - 1 << 2, 0, len, block); + } + + private void writeLiteralBlockOneSizeByte(LZ77Compressor.LiteralBlock block, int len) throws IOException { + writeLiteralBlockWithSize(ONE_SIZE_BYTE_MARKER, 1, len, block); + } + + private void writeLiteralBlockTwoSizeBytes(LZ77Compressor.LiteralBlock block, int len) throws IOException { + writeLiteralBlockWithSize(TWO_SIZE_BYTE_MARKER, 2, len, block); + } + + private void writeLiteralBlockThreeSizeBytes(LZ77Compressor.LiteralBlock block, int len) throws IOException { + writeLiteralBlockWithSize(THREE_SIZE_BYTE_MARKER, 3, len, block); + } + + private void writeLiteralBlockFourSizeBytes(LZ77Compressor.LiteralBlock block, int len) throws IOException { + writeLiteralBlockWithSize(FOUR_SIZE_BYTE_MARKER, 4, len, block); + } + + private void writeLiteralBlockWithSize(int tagByte, int sizeBytes, int len, LZ77Compressor.LiteralBlock block) + throws IOException { + os.write(tagByte); + writeLittleEndian(sizeBytes, len - 1); + os.write(block.getData(), block.getOffset(), len); + } + + private void writeLittleEndian(final int numBytes, int num) throws IOException { + ByteUtils.toLittleEndian(consumer, num, numBytes); + } + + // Back-references ("copies") have their offset/size information + // in two, three or five bytes. + private static final int MIN_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE = 4; + private static final int MAX_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE = 11; + private static final int MAX_OFFSET_WITH_ONE_OFFSET_BYTE = 1 << 11 - 1; + private static final int MAX_OFFSET_WITH_TWO_OFFSET_BYTES = 1 << 16 - 1; + + private static final int ONE_BYTE_COPY_TAG = 1; + private static final int TWO_BYTE_COPY_TAG = 2; + private static final int FOUR_BYTE_COPY_TAG = 3; + + private void writeBackReference(LZ77Compressor.BackReference block) throws IOException { + final int len = block.getLength(); + final int offset = block.getOffset(); + if (len >= MIN_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE && len <= MAX_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE + && offset <= MAX_OFFSET_WITH_ONE_OFFSET_BYTE) { + writeBackReferenceWithOneOffsetByte(len, offset); + } else if (offset < MAX_OFFSET_WITH_TWO_OFFSET_BYTES) { + writeBackReferenceWithTwoOffsetBytes(len, offset); + } else { + writeBackReferenceWithFourOffsetBytes(len, offset); + } + } + + private void writeBackReferenceWithOneOffsetByte(int len, int offset) throws IOException { + os.write(ONE_BYTE_COPY_TAG | ((len - 4) << 2) | ((offset & 0x700) >> 3)); + os.write(offset & 0xff); + } + + private void writeBackReferenceWithTwoOffsetBytes(int len, int offset) throws IOException { + writeBackReferenceWithLittleEndianOffset(TWO_BYTE_COPY_TAG, 2, len, offset); + } + + private void writeBackReferenceWithFourOffsetBytes(int len, int offset) throws IOException { + writeBackReferenceWithLittleEndianOffset(FOUR_BYTE_COPY_TAG, 4, len, offset); + } + + private void writeBackReferenceWithLittleEndianOffset(int tag, int offsetBytes, int len, int offset) + throws IOException { + os.write(tag | ((len - 1) << 2)); + writeLittleEndian(offsetBytes, offset); + } + + // technically the format could use shorter matches but with a + // length of three the offset would be encoded as at least two + // bytes in addition to the tag, so yield no compression at all + private static final int MIN_MATCH_LENGTH = 4; + // Snappy stores the match length in six bits of the tag + private static final int MAX_MATCH_LENGTH = 64; + + /** + * Returns a builder correctly configured for the Snappy algorithm using the gven block size. + * @param blockSize the block size. + * @return a builder correctly configured for the Snappy algorithm using the gven block size + */ + public static Parameters.Builder createParameterBuilder(int blockSize) { + // the max offset and max literal length defined by the format + // are 2^32 - 1 and 2^32 respectively - with blockSize being + // an integer we will never exceed that + return Parameters.builder(blockSize) + .withMinBackReferenceLength(MIN_MATCH_LENGTH) + .withMaxBackReferenceLength(MAX_MATCH_LENGTH) + .withMaxOffset(blockSize) + .withMaxLiteralLength(blockSize); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/package.html b/src/main/java/org/apache/commons/compress/compressors/snappy/package.html new file mode 100644 index 000000000..efef07180 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/package.html @@ -0,0 +1,38 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for the + <a href="https://github.com/google/snappy">Snappy</a> + algorithm.</p> + + <p>The raw Snappy format which only contains the compressed data + is supported by the <code>SnappyCompressor*putStream</code> + classes while the so called "framing format" is implemented + by <code>FramedSnappyCompressor*putStream</code>. Note there + have been different versions of the framing format specification, + the implementation in Commons Compress is based on the + specification "Last revised: 2013-10-25".</p> + + <p>Only the "framing format" can be auto-detected this means you + have to speficy the format explicitly if you want to read a + "raw" Snappy stream + via <code>CompressorStreamFactory</code>.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/xz/XZCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/xz/XZCompressorInputStream.java new file mode 100644 index 000000000..20d67a19a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/xz/XZCompressorInputStream.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.xz; + +import java.io.IOException; +import java.io.InputStream; + +import org.tukaani.xz.XZ; +import org.tukaani.xz.SingleXZInputStream; +import org.tukaani.xz.XZInputStream; + +import org.apache.commons.compress.MemoryLimitException; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * XZ decompressor. + * @since 1.4 + */ +public class XZCompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + private final CountingInputStream countingStream; + private final InputStream in; + + /** + * Checks if the signature matches what is expected for a .xz file. + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true if signature matches the .xz magic bytes, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < XZ.HEADER_MAGIC.length) { + return false; + } + + for (int i = 0; i < XZ.HEADER_MAGIC.length; ++i) { + if (signature[i] != XZ.HEADER_MAGIC[i]) { + return false; + } + } + + return true; + } + + /** + * Creates a new input stream that decompresses XZ-compressed data + * from the specified input stream. This doesn't support + * concatenated .xz files. + * + * @param inputStream where to read the compressed data + * + * @throws IOException if the input is not in the .xz format, + * the input is corrupt or truncated, the .xz + * headers specify options that are not supported + * by this implementation, or the underlying + * <code>inputStream</code> throws an exception + */ + public XZCompressorInputStream(final InputStream inputStream) + throws IOException { + this(inputStream, false); + } + + /** + * Creates a new input stream that decompresses XZ-compressed data + * from the specified input stream. + * + * @param inputStream where to read the compressed data + * @param decompressConcatenated + * if true, decompress until the end of the + * input; if false, stop after the first .xz + * stream and leave the input position to point + * to the next byte after the .xz stream + * + * @throws IOException if the input is not in the .xz format, + * the input is corrupt or truncated, the .xz + * headers specify options that are not supported + * by this implementation, or the underlying + * <code>inputStream</code> throws an exception + */ + public XZCompressorInputStream(final InputStream inputStream, + final boolean decompressConcatenated) + throws IOException { + this(inputStream, decompressConcatenated, -1); + } + + /** + * Creates a new input stream that decompresses XZ-compressed data + * from the specified input stream. + * + * @param inputStream where to read the compressed data + * @param decompressConcatenated + * if true, decompress until the end of the + * input; if false, stop after the first .xz + * stream and leave the input position to point + * to the next byte after the .xz stream + * @param memoryLimitInKb memory limit used when reading blocks. If + * the estimated memory limit is exceeded on {@link #read()}, + * a {@link MemoryLimitException} is thrown. + * + * @throws IOException if the input is not in the .xz format, + * the input is corrupt or truncated, the .xz + * headers specify options that are not supported + * by this implementation, + * or the underlying <code>inputStream</code> throws an exception + * + * @since 1.14 + */ + public XZCompressorInputStream(InputStream inputStream, + boolean decompressConcatenated, final int memoryLimitInKb) + throws IOException { + countingStream = new CountingInputStream(inputStream); + if (decompressConcatenated) { + in = new XZInputStream(countingStream, memoryLimitInKb); + } else { + in = new SingleXZInputStream(countingStream, memoryLimitInKb); + } + } + + @Override + public int read() throws IOException { + try { + final int ret = in.read(); + count(ret == -1 ? -1 : 1); + return ret; + } catch (org.tukaani.xz.MemoryLimitException e) { + throw new MemoryLimitException(e.getMemoryNeeded(), e.getMemoryLimit(), e); + } + } + + @Override + public int read(final byte[] buf, final int off, final int len) throws IOException { + try { + final int ret = in.read(buf, off, len); + count(ret); + return ret; + } catch (org.tukaani.xz.MemoryLimitException e) { + //convert to commons-compress MemoryLimtException + throw new MemoryLimitException(e.getMemoryNeeded(), e.getMemoryLimit(), e); + } + } + + @Override + public long skip(final long n) throws IOException { + try { + return IOUtils.skip(in, n); + } catch (org.tukaani.xz.MemoryLimitException e) { + //convert to commons-compress MemoryLimtException + throw new MemoryLimitException(e.getMemoryNeeded(), e.getMemoryLimit(), e); + } + } + + @Override + public int available() throws IOException { + return in.available(); + } + + @Override + public void close() throws IOException { + in.close(); + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return countingStream.getBytesRead(); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/xz/XZCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/xz/XZCompressorOutputStream.java new file mode 100644 index 000000000..6e9b70e05 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/xz/XZCompressorOutputStream.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.xz; + +import java.io.IOException; +import java.io.OutputStream; +import org.tukaani.xz.LZMA2Options; +import org.tukaani.xz.XZOutputStream; + +import org.apache.commons.compress.compressors.CompressorOutputStream; + +/** + * XZ compressor. + * @since 1.4 + */ +public class XZCompressorOutputStream extends CompressorOutputStream { + private final XZOutputStream out; + + /** + * Creates a new XZ compressor using the default LZMA2 options. + * This is equivalent to <code>XZCompressorOutputStream(outputStream, 6)</code>. + * @param outputStream the stream to wrap + * @throws IOException on error + */ + public XZCompressorOutputStream(final OutputStream outputStream) + throws IOException { + out = new XZOutputStream(outputStream, new LZMA2Options()); + } + + /** + * Creates a new XZ compressor using the specified LZMA2 preset level. + * <p> + * The presets 0-3 are fast presets with medium compression. + * The presets 4-6 are fairly slow presets with high compression. + * The default preset is 6. + * <p> + * The presets 7-9 are like the preset 6 but use bigger dictionaries + * and have higher compressor and decompressor memory requirements. + * Unless the uncompressed size of the file exceeds 8 MiB, + * 16 MiB, or 32 MiB, it is waste of memory to use the + * presets 7, 8, or 9, respectively. + * @param outputStream the stream to wrap + * @param preset the preset + * @throws IOException on error + */ + public XZCompressorOutputStream(final OutputStream outputStream, final int preset) + throws IOException { + out = new XZOutputStream(outputStream, new LZMA2Options(preset)); + } + + @Override + public void write(final int b) throws IOException { + out.write(b); + } + + @Override + public void write(final byte[] buf, final int off, final int len) throws IOException { + out.write(buf, off, len); + } + + /** + * Flushes the encoder and calls <code>outputStream.flush()</code>. + * All buffered pending data will then be decompressible from + * the output stream. Calling this function very often may increase + * the compressed file size a lot. + */ + @Override + public void flush() throws IOException { + out.flush(); + } + + /** + * Finishes compression without closing the underlying stream. + * No more data can be written to this stream after finishing. + * @throws IOException on error + */ + public void finish() throws IOException { + out.finish(); + } + + @Override + public void close() throws IOException { + out.close(); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/xz/XZUtils.java b/src/main/java/org/apache/commons/compress/compressors/xz/XZUtils.java new file mode 100644 index 000000000..be4625e5d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/xz/XZUtils.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.xz; + +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.compressors.FileNameUtil; + +/** + * Utility code for the xz compression format. + * @ThreadSafe + * @since 1.4 + */ +public class XZUtils { + + private static final FileNameUtil fileNameUtil; + + /** + * XZ Header Magic Bytes begin a XZ file. + * + * <p>This is a copy of {@code org.tukaani.xz.XZ.HEADER_MAGIC} in + * XZ for Java version 1.5.</p> + */ + private static final byte[] HEADER_MAGIC = { + (byte) 0xFD, '7', 'z', 'X', 'Z', '\0' + }; + + enum CachedAvailability { + DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE + } + + private static volatile CachedAvailability cachedXZAvailability; + + static { + final Map<String, String> uncompressSuffix = new HashMap<>(); + uncompressSuffix.put(".txz", ".tar"); + uncompressSuffix.put(".xz", ""); + uncompressSuffix.put("-xz", ""); + fileNameUtil = new FileNameUtil(uncompressSuffix, ".xz"); + cachedXZAvailability = CachedAvailability.DONT_CACHE; + try { + Class.forName("org.osgi.framework.BundleEvent"); + } catch (final Exception ex) { + setCacheXZAvailablity(true); + } + } + + /** Private constructor to prevent instantiation of this utility class. */ + private XZUtils() { + } + + /** + * Checks if the signature matches what is expected for a .xz file. + * + * <p>This is more or less a copy of the version found in {@link + * XZCompressorInputStream} but doesn't depend on the presence of + * XZ for Java.</p> + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true if signature matches the .xz magic bytes, false otherwise + * @since 1.9 + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < HEADER_MAGIC.length) { + return false; + } + + for (int i = 0; i < HEADER_MAGIC.length; ++i) { + if (signature[i] != HEADER_MAGIC[i]) { + return false; + } + } + + return true; + } + + /** + * Are the classes required to support XZ compression available? + * @since 1.5 + * @return true if the classes required to support XZ compression are available + */ + public static boolean isXZCompressionAvailable() { + final CachedAvailability cachedResult = cachedXZAvailability; + if (cachedResult != CachedAvailability.DONT_CACHE) { + return cachedResult == CachedAvailability.CACHED_AVAILABLE; + } + return internalIsXZCompressionAvailable(); + } + + private static boolean internalIsXZCompressionAvailable() { + try { + XZCompressorInputStream.matches(null, 0); + return true; + } catch (final NoClassDefFoundError error) { + return false; + } + } + + /** + * Detects common xz suffixes in the given filename. + * + * @param filename name of a file + * @return {@code true} if the filename has a common xz suffix, + * {@code false} otherwise + */ + public static boolean isCompressedFilename(final String filename) { + return fileNameUtil.isCompressedFilename(filename); + } + + /** + * Maps the given name of a xz-compressed file to the name that the + * file should have after uncompression. Commonly used file type specific + * suffixes like ".txz" are automatically detected and + * correctly mapped. For example the name "package.txz" is mapped to + * "package.tar". And any filenames with the generic ".xz" suffix + * (or any other generic xz suffix) is mapped to a name without that + * suffix. If no xz suffix is detected, then the filename is returned + * unmapped. + * + * @param filename name of a file + * @return name of the corresponding uncompressed file + */ + public static String getUncompressedFilename(final String filename) { + return fileNameUtil.getUncompressedFilename(filename); + } + + /** + * Maps the given filename to the name that the file should have after + * compression with xz. Common file types with custom suffixes for + * compressed versions are automatically detected and correctly mapped. + * For example the name "package.tar" is mapped to "package.txz". If no + * custom mapping is applicable, then the default ".xz" suffix is appended + * to the filename. + * + * @param filename name of a file + * @return name of the corresponding compressed file + */ + public static String getCompressedFilename(final String filename) { + return fileNameUtil.getCompressedFilename(filename); + } + + /** + * Whether to cache the result of the XZ for Java check. + * + * <p>This defaults to {@code false} in an OSGi environment and {@code true} otherwise.</p> + * @param doCache whether to cache the result + * @since 1.9 + */ + public static void setCacheXZAvailablity(final boolean doCache) { + if (!doCache) { + cachedXZAvailability = CachedAvailability.DONT_CACHE; + } else if (cachedXZAvailability == CachedAvailability.DONT_CACHE) { + final boolean hasXz = internalIsXZCompressionAvailable(); + cachedXZAvailability = hasXz ? CachedAvailability.CACHED_AVAILABLE // NOSONAR + : CachedAvailability.CACHED_UNAVAILABLE; + } + } + + // only exists to support unit tests + static CachedAvailability getCachedXZAvailability() { + return cachedXZAvailability; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/xz/package.html b/src/main/java/org/apache/commons/compress/compressors/xz/package.html new file mode 100644 index 000000000..48eca2518 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/xz/package.html @@ -0,0 +1,31 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for compressing and decompressing + streams using the XZ algorithm.</p> + + <p>The classes in this package are wrappers around {@link + org.tukaani.xz.XZInputStream org.tukaani.xz.XZInputStream} and + {@link org.tukaani.xz.XZOutputStream + org.tukaani.xz.XZOutputStream} provided by the public + domain <a href="https://tukaani.org/xz/java.html">XZ for Java</a> + library.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java new file mode 100644 index 000000000..b7ce16f33 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.z; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; + +import org.apache.commons.compress.compressors.lzw.LZWInputStream; + +/** + * Input stream that decompresses .Z files. + * @NotThreadSafe + * @since 1.7 + */ +public class ZCompressorInputStream extends LZWInputStream { + private static final int MAGIC_1 = 0x1f; + private static final int MAGIC_2 = 0x9d; + private static final int BLOCK_MODE_MASK = 0x80; + private static final int MAX_CODE_SIZE_MASK = 0x1f; + private final boolean blockMode; + private final int maxCodeSize; + private long totalCodesRead = 0; + + public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb) + throws IOException { + super(inputStream, ByteOrder.LITTLE_ENDIAN); + final int firstByte = (int) in.readBits(8); + final int secondByte = (int) in.readBits(8); + final int thirdByte = (int) in.readBits(8); + if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) { + throw new IOException("Input is not in .Z format"); + } + blockMode = (thirdByte & BLOCK_MODE_MASK) != 0; + maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK; + if (blockMode) { + setClearCode(DEFAULT_CODE_SIZE); + } + initializeTables(maxCodeSize, memoryLimitInKb); + clearEntries(); + } + + public ZCompressorInputStream(final InputStream inputStream) throws IOException { + this(inputStream, -1); + } + + private void clearEntries() { + setTableSize((1 << 8) + (blockMode ? 1 : 0)); + } + + /** + * {@inheritDoc} + * <p><strong>This method is only protected for technical reasons + * and is not part of Commons Compress' published API. It may + * change or disappear without warning.</strong></p> + */ + @Override + protected int readNextCode() throws IOException { + final int code = super.readNextCode(); + if (code >= 0) { + ++totalCodesRead; + } + return code; + } + + private void reAlignReading() throws IOException { + // "compress" works in multiples of 8 symbols, each codeBits bits long. + // When codeBits changes, the remaining unused symbols in the current + // group of 8 are still written out, in the old codeSize, + // as garbage values (usually zeroes) that need to be skipped. + long codeReadsToThrowAway = 8 - (totalCodesRead % 8); + if (codeReadsToThrowAway == 8) { + codeReadsToThrowAway = 0; + } + for (long i = 0; i < codeReadsToThrowAway; i++) { + readNextCode(); + } + in.clearBitCache(); + } + + /** + * {@inheritDoc} + * <p><strong>This method is only protected for technical reasons + * and is not part of Commons Compress' published API. It may + * change or disappear without warning.</strong></p> + */ + @Override + protected int addEntry(final int previousCode, final byte character) throws IOException { + final int maxTableSize = 1 << getCodeSize(); + final int r = addEntry(previousCode, character, maxTableSize); + if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) { + reAlignReading(); + incrementCodeSize(); + } + return r; + } + + /** + * {@inheritDoc} + * <p><strong>This method is only protected for technical reasons + * and is not part of Commons Compress' published API. It may + * change or disappear without warning.</strong></p> + */ + @Override + protected int decompressNextSymbol() throws IOException { + // + // table entry table entry + // _____________ _____ + // table entry / \ / \ + // ____________/ \ \ + // / / \ / \ \ + // +---+---+---+---+---+---+---+---+---+---+ + // | . | . | . | . | . | . | . | . | . | . | + // +---+---+---+---+---+---+---+---+---+---+ + // |<--------->|<------------->|<----->|<->| + // symbol symbol symbol symbol + // + final int code = readNextCode(); + if (code < 0) { + return -1; + } else if (blockMode && code == getClearCode()) { + clearEntries(); + reAlignReading(); + resetCodeSize(); + resetPreviousCode(); + return 0; + } else { + boolean addedUnfinishedEntry = false; + if (code == getTableSize()) { + addRepeatOfPreviousCode(); + addedUnfinishedEntry = true; + } else if (code > getTableSize()) { + throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code)); + } + return expandCodeToOutputStack(code, addedUnfinishedEntry); + } + } + + /** + * Checks if the signature matches what is expected for a Unix compress file. + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this stream is a Unix compress compressed + * stream, false otherwise + * + * @since 1.9 + */ + public static boolean matches(final byte[] signature, final int length) { + return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2; + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/z/package.html b/src/main/java/org/apache/commons/compress/compressors/z/package.html new file mode 100644 index 000000000..ca9924b78 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/z/package.html @@ -0,0 +1,24 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream classes for decompressing + streams using the "compress" algorithm used to write .Z files.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java new file mode 100644 index 000000000..7a47f1000 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + + +import java.io.IOException; +import java.io.InputStream; + +import com.github.luben.zstd.ZstdInputStream; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.CountingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * {@link CompressorInputStream} implementation to decode Zstandard encoded stream. + * Library relies on <a href="https://github.com/luben/zstd-jni/">Zstandard JNI</a> + * + * @since 1.16 + */ +public class ZstdCompressorInputStream extends CompressorInputStream + implements InputStreamStatistics { + + private final CountingInputStream countingStream; + private final ZstdInputStream decIS; + + public ZstdCompressorInputStream(final InputStream in) throws IOException { + this.decIS = new ZstdInputStream(countingStream = new CountingInputStream(in)); + } + + @Override + public int available() throws IOException { + return decIS.available(); + } + + @Override + public void close() throws IOException { + decIS.close(); + } + + @Override + public int read(final byte[] b) throws IOException { + return decIS.read(b); + } + + @Override + public long skip(final long n) throws IOException { + return IOUtils.skip(decIS, n); + } + + @Override + public void mark(final int readlimit) { + decIS.mark(readlimit); + } + + @Override + public boolean markSupported() { + return decIS.markSupported(); + } + + @Override + public int read() throws IOException { + final int ret = decIS.read(); + count(ret == -1 ? 0 : 1); + return ret; + } + + @Override + public int read(final byte[] buf, final int off, final int len) throws IOException { + final int ret = decIS.read(buf, off, len); + count(ret); + return ret; + } + + @Override + public String toString() { + return decIS.toString(); + } + + @Override + public void reset() throws IOException { + decIS.reset(); + } + + /** + * @since 1.17 + */ + @Override + public long getCompressedCount() { + return countingStream.getBytesRead(); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorOutputStream.java new file mode 100644 index 000000000..0deda7d0b --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorOutputStream.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + + +import java.io.IOException; +import java.io.OutputStream; + +import com.github.luben.zstd.ZstdOutputStream; +import org.apache.commons.compress.compressors.CompressorOutputStream; + +/** + * {@link CompressorOutputStream} implementation to create Zstandard encoded stream. + * Library relies on <a href="https://github.com/luben/zstd-jni/">Zstandard JNI</a> + * + * @since 1.16 + */ +public class ZstdCompressorOutputStream extends CompressorOutputStream { + + private final ZstdOutputStream encOS; + + /** + * Wraps the given stream into a zstd-jni ZstdOutputStream. + * @param outStream the stream to write to + * @param level value for zstd-jni's level argument + * @param closeFrameOnFlush value for zstd-jni's closeFrameOnFlush argument + * @param useChecksum value for zstd-jni's useChecksum argument + * @throws IOException if zstd-jni does + * @since 1.18 + */ + public ZstdCompressorOutputStream(final OutputStream outStream, int level, boolean closeFrameOnFlush, + boolean useChecksum) throws IOException { + this.encOS = new ZstdOutputStream(outStream, level, closeFrameOnFlush, useChecksum); + } + + /** + * Wraps the given stream into a zstd-jni ZstdOutputStream using the default value for {@code useChecksum}. + * @param outStream the stream to write to + * @param level value for zstd-jni's level argument + * @param closeFrameOnFlush value for zstd-jni's closeFrameOnFlush argument + * @throws IOException if zstd-jni does + * @since 1.18 + */ + public ZstdCompressorOutputStream(final OutputStream outStream, int level, boolean closeFrameOnFlush) + throws IOException { + this.encOS = new ZstdOutputStream(outStream, level, closeFrameOnFlush); + } + + /** + * Wraps the given stream into a zstd-jni ZstdOutputStream using the default values for {@code closeFrameOnFlush} + * and {@code useChecksum}. + * @param outStream the stream to write to + * @param level value for zstd-jni's level argument + * @throws IOException if zstd-jni does + * @since 1.18 + */ + public ZstdCompressorOutputStream(final OutputStream outStream, int level) throws IOException { + this.encOS = new ZstdOutputStream(outStream, level); + } + + /** + * Wraps the given stream into a zstd-jni ZstdOutputStream using the default values for {@code level}, {@code + * closeFrameOnFlush} and {@code useChecksum}. + * @param outStream the stream to write to + * @throws IOException if zstd-jni does + */ + public ZstdCompressorOutputStream(final OutputStream outStream) throws IOException { + this.encOS = new ZstdOutputStream(outStream); + } + + @Override + public void close() throws IOException { + encOS.close(); + } + + @Override + public void write(final int b) throws IOException { + encOS.write(b); + } + + @Override + public void write(final byte[] buf, final int off, final int len) throws IOException { + encOS.write(buf, off, len); + } + + @Override + public String toString() { + return encOS.toString(); + } + + @Override + public void flush() throws IOException { + encOS.flush(); + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java new file mode 100644 index 000000000..8b2f8d3d7 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.zstandard; + +/** + * Utility code for the Zstandard compression format. + * @ThreadSafe + * @since 1.16 + */ +public class ZstdUtils { + + enum CachedAvailability { + DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE + } + + /** + * Zstandard Frame Magic Bytes. + */ + private static final byte[] ZSTANDARD_FRAME_MAGIC = { + (byte) 0x28, (byte) 0xB5, (byte) 0x2F, (byte) 0xFD + }; + + /** + * Skippable Frame Magic Bytes - the three common bytes. + */ + private static final byte[] SKIPPABLE_FRAME_MAGIC = { + (byte) 0x2A, (byte) 0x4D, (byte) 0x18 + }; + + private static volatile CachedAvailability cachedZstdAvailability; + + static { + cachedZstdAvailability = CachedAvailability.DONT_CACHE; + try { + Class.forName("org.osgi.framework.BundleEvent"); + } catch (final Exception ex) { // NOSONAR + setCacheZstdAvailablity(true); + } + } + + /** Private constructor to prevent instantiation of this utility class. */ + private ZstdUtils() { + } + + /** + * Are the classes required to support Zstandard compression available? + * @return true if the classes required to support Zstandard compression are available + */ + public static boolean isZstdCompressionAvailable() { + final CachedAvailability cachedResult = cachedZstdAvailability; + if (cachedResult != CachedAvailability.DONT_CACHE) { + return cachedResult == CachedAvailability.CACHED_AVAILABLE; + } + return internalIsZstdCompressionAvailable(); + } + + private static boolean internalIsZstdCompressionAvailable() { + try { + Class.forName("com.github.luben.zstd.ZstdInputStream"); + return true; + } catch (NoClassDefFoundError | Exception error) { // NOSONAR + return false; + } + } + + /** + * Whether to cache the result of the Zstandard for Java check. + * + * <p>This defaults to {@code false} in an OSGi environment and {@code true} otherwise.</p> + * @param doCache whether to cache the result + */ + public static void setCacheZstdAvailablity(final boolean doCache) { + if (!doCache) { + cachedZstdAvailability = CachedAvailability.DONT_CACHE; + } else if (cachedZstdAvailability == CachedAvailability.DONT_CACHE) { + final boolean hasZstd = internalIsZstdCompressionAvailable(); + cachedZstdAvailability = hasZstd ? CachedAvailability.CACHED_AVAILABLE + : CachedAvailability.CACHED_UNAVAILABLE; + } + } + + /** + * Checks if the signature matches what is expected for a Zstandard file. + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true if signature matches the Ztstandard or skippable + * frame magic bytes, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < ZSTANDARD_FRAME_MAGIC.length) { + return false; + } + + boolean isZstandard = true; + for (int i = 0; i < ZSTANDARD_FRAME_MAGIC.length; ++i) { + if (signature[i] != ZSTANDARD_FRAME_MAGIC[i]) { + isZstandard = false; + break; + } + } + if (isZstandard) { + return true; + } + + if (0x50 == (signature[0] & 0xF0)) { + // skippable frame + for (int i = 0; i < SKIPPABLE_FRAME_MAGIC.length; ++i) { + if (signature[i + 1] != SKIPPABLE_FRAME_MAGIC[i]) { + return false; + } + } + + return true; + } + + return false; + } + + // only exists to support unit tests + static CachedAvailability getCachedZstdAvailability() { + return cachedZstdAvailability; + } +} diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/package.html b/src/main/java/org/apache/commons/compress/compressors/zstandard/package.html new file mode 100644 index 000000000..6deb74fcf --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/package.html @@ -0,0 +1,26 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides stream class for (de)compressing streams using the + Zstandard algorithm based + on <a href="https://github.com/luben/zstd-jni">Zstandard + JNI</a>.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/parallel/FileBasedScatterGatherBackingStore.java b/src/main/java/org/apache/commons/compress/parallel/FileBasedScatterGatherBackingStore.java new file mode 100644 index 000000000..a9105f00f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/parallel/FileBasedScatterGatherBackingStore.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.parallel; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; + +/** + * ScatterGatherBackingStore that is backed by a file. + * + * @since 1.10 + */ +public class FileBasedScatterGatherBackingStore implements ScatterGatherBackingStore { + private final File target; + private final OutputStream os; + private boolean closed; + + public FileBasedScatterGatherBackingStore(final File target) throws FileNotFoundException { + this.target = target; + try { + os = Files.newOutputStream(target.toPath()); + } catch (FileNotFoundException ex) { + throw ex; + } catch (IOException ex) { + // must convert exception to stay backwards compatible with Compress 1.10 to 1.13 + throw new RuntimeException(ex); // NOSONAR + } + } + + @Override + public InputStream getInputStream() throws IOException { + return Files.newInputStream(target.toPath()); + } + + @Override + @SuppressWarnings("ResultOfMethodCallIgnored") + public void closeForWriting() throws IOException { + if (!closed) { + os.close(); + closed = true; + } + } + + @Override + public void writeOut(final byte[] data, final int offset, final int length) throws IOException { + os.write(data, offset, length); + } + + @Override + public void close() throws IOException { + try { + closeForWriting(); + } finally { + target.delete(); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/parallel/InputStreamSupplier.java b/src/main/java/org/apache/commons/compress/parallel/InputStreamSupplier.java new file mode 100644 index 000000000..f227e6431 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/parallel/InputStreamSupplier.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.parallel; + +import java.io.InputStream; + +/** + * Supplies input streams. + * + * Implementations are required to support thread-handover. While an instance will + * not be accessed concurrently by multiple threads, it will be called by + * a different thread than it was created on. + * + * @since 1.10 + */ +public interface InputStreamSupplier { + + /** + * Supply an input stream for a resource. + * @return the input stream. Should never null, but may be an empty stream. + */ + InputStream get(); +} diff --git a/src/main/java/org/apache/commons/compress/parallel/ScatterGatherBackingStore.java b/src/main/java/org/apache/commons/compress/parallel/ScatterGatherBackingStore.java new file mode 100644 index 000000000..ea726ff8a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/parallel/ScatterGatherBackingStore.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.parallel; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; + +/** + * <p>Store intermediate payload in a scatter-gather scenario. + * Multiple threads write their payload to a backing store, which can + * subsequently be reversed to an {@link InputStream} to be used as input in the + * gather phase.</p> + * + * <p>It is the responsibility of the allocator of an instance of this class + * to close this. Closing it should clear off any allocated structures + * and preferably delete files.</p> + * + * @since 1.10 + */ +public interface ScatterGatherBackingStore extends Closeable { + + /** + * An input stream that contains the scattered payload + * + * @return An InputStream, should be closed by the caller of this method. + * @throws IOException when something fails + */ + InputStream getInputStream() throws IOException; + + /** + * Writes a piece of payload. + * + * @param data the data to write + * @param offset offset inside data to start writing from + * @param length the amount of data to write + * @throws IOException when something fails + */ + void writeOut(byte[] data, int offset, int length) throws IOException; + + /** + * Closes this backing store for further writing. + * @throws IOException when something fails + */ + void closeForWriting() throws IOException; +} diff --git a/src/main/java/org/apache/commons/compress/parallel/ScatterGatherBackingStoreSupplier.java b/src/main/java/org/apache/commons/compress/parallel/ScatterGatherBackingStoreSupplier.java new file mode 100644 index 000000000..9a216a7b8 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/parallel/ScatterGatherBackingStoreSupplier.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.parallel; + +import java.io.IOException; + +/** + * Supplies {@link ScatterGatherBackingStore} instances. + * + * @since 1.10 + */ +public interface ScatterGatherBackingStoreSupplier { + /** + * Create a ScatterGatherBackingStore. + * + * @return a ScatterGatherBackingStore, not null + * @throws IOException when something fails + */ + ScatterGatherBackingStore get() throws IOException; +} diff --git a/src/main/java/org/apache/commons/compress/parallel/package.html b/src/main/java/org/apache/commons/compress/parallel/package.html new file mode 100644 index 000000000..3517bc575 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/parallel/package.html @@ -0,0 +1,23 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Provides common API classes for parallel compression features.</p> + </body> +</html> diff --git a/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java b/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java new file mode 100644 index 000000000..3fe3fbadd --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.io.UnsupportedEncodingException; +import java.util.Arrays; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * Generic Archive utilities + */ +public class ArchiveUtils { + + private static final int MAX_SANITIZED_NAME_LENGTH = 255; + + /** Private constructor to prevent instantiation of this utility class. */ + private ArchiveUtils(){ + } + + /** + * Generates a string containing the name, isDirectory setting and size of an entry. + * <p> + * For example: + * <pre> + * - 2000 main.c + * d 100 testfiles + * </pre> + * + * @param entry the entry + * @return the representation of the entry + */ + public static String toString(final ArchiveEntry entry){ + final StringBuilder sb = new StringBuilder(); + sb.append(entry.isDirectory()? 'd' : '-');// c.f. "ls -l" output + final String size = Long.toString(entry.getSize()); + sb.append(' '); + // Pad output to 7 places, leading spaces + for(int i=7; i > size.length(); i--){ + sb.append(' '); + } + sb.append(size); + sb.append(' ').append(entry.getName()); + return sb.toString(); + } + + /** + * Check if buffer contents matches Ascii String. + * + * @param expected expected string + * @param buffer the buffer + * @param offset offset to read from + * @param length length of the buffer + * @return {@code true} if buffer is the same as the expected string + */ + public static boolean matchAsciiBuffer( + final String expected, final byte[] buffer, final int offset, final int length){ + byte[] buffer1; + try { + buffer1 = expected.getBytes(CharsetNames.US_ASCII); + } catch (final UnsupportedEncodingException e) { + // Should not happen + throw new RuntimeException(e); //NOSONAR + } + return isEqual(buffer1, 0, buffer1.length, buffer, offset, length, false); + } + + /** + * Check if buffer contents matches Ascii String. + * + * @param expected the expected strin + * @param buffer the buffer + * @return {@code true} if buffer is the same as the expected string + */ + public static boolean matchAsciiBuffer(final String expected, final byte[] buffer){ + return matchAsciiBuffer(expected, buffer, 0, buffer.length); + } + + /** + * Convert a string to Ascii bytes. + * Used for comparing "magic" strings which need to be independent of the default Locale. + * + * @param inputString string to convert + * @return the bytes + */ + public static byte[] toAsciiBytes(final String inputString){ + try { + return inputString.getBytes(CharsetNames.US_ASCII); + } catch (final UnsupportedEncodingException e) { + // Should never happen + throw new RuntimeException(e); //NOSONAR + } + } + + /** + * Convert an input byte array to a String using the ASCII character set. + * + * @param inputBytes bytes to convert + * @return the bytes, interpreted as an Ascii string + */ + public static String toAsciiString(final byte[] inputBytes){ + try { + return new String(inputBytes, CharsetNames.US_ASCII); + } catch (final UnsupportedEncodingException e) { + // Should never happen + throw new RuntimeException(e); //NOSONAR + } + } + + /** + * Convert an input byte array to a String using the ASCII character set. + * + * @param inputBytes input byte array + * @param offset offset within array + * @param length length of array + * @return the bytes, interpreted as an Ascii string + */ + public static String toAsciiString(final byte[] inputBytes, final int offset, final int length){ + try { + return new String(inputBytes, offset, length, CharsetNames.US_ASCII); + } catch (final UnsupportedEncodingException e) { + // Should never happen + throw new RuntimeException(e); //NOSONAR + } + } + + /** + * Compare byte buffers, optionally ignoring trailing nulls + * + * @param buffer1 first buffer + * @param offset1 first offset + * @param length1 first length + * @param buffer2 second buffer + * @param offset2 second offset + * @param length2 second length + * @param ignoreTrailingNulls whether to ignore trailing nulls + * @return {@code true} if buffer1 and buffer2 have same contents, having regard to trailing nulls + */ + public static boolean isEqual( + final byte[] buffer1, final int offset1, final int length1, + final byte[] buffer2, final int offset2, final int length2, + final boolean ignoreTrailingNulls){ + final int minLen=length1 < length2 ? length1 : length2; + for (int i=0; i < minLen; i++){ + if (buffer1[offset1+i] != buffer2[offset2+i]){ + return false; + } + } + if (length1 == length2){ + return true; + } + if (ignoreTrailingNulls){ + if (length1 > length2){ + for(int i = length2; i < length1; i++){ + if (buffer1[offset1+i] != 0){ + return false; + } + } + } else { + for(int i = length1; i < length2; i++){ + if (buffer2[offset2+i] != 0){ + return false; + } + } + } + return true; + } + return false; + } + + /** + * Compare byte buffers + * + * @param buffer1 the first buffer + * @param offset1 the first offset + * @param length1 the first length + * @param buffer2 the second buffer + * @param offset2 the second offset + * @param length2 the second length + * @return {@code true} if buffer1 and buffer2 have same contents + */ + public static boolean isEqual( + final byte[] buffer1, final int offset1, final int length1, + final byte[] buffer2, final int offset2, final int length2){ + return isEqual(buffer1, offset1, length1, buffer2, offset2, length2, false); + } + + /** + * Compare byte buffers + * + * @param buffer1 the first buffer + * @param buffer2 the second buffer + * @return {@code true} if buffer1 and buffer2 have same contents + */ + public static boolean isEqual(final byte[] buffer1, final byte[] buffer2 ){ + return isEqual(buffer1, 0, buffer1.length, buffer2, 0, buffer2.length, false); + } + + /** + * Compare byte buffers, optionally ignoring trailing nulls + * + * @param buffer1 the first buffer + * @param buffer2 the second buffer + * @param ignoreTrailingNulls whether to ignore tariling nulls + * @return {@code true} if buffer1 and buffer2 have same contents + */ + public static boolean isEqual(final byte[] buffer1, final byte[] buffer2, final boolean ignoreTrailingNulls){ + return isEqual(buffer1, 0, buffer1.length, buffer2, 0, buffer2.length, ignoreTrailingNulls); + } + + /** + * Compare byte buffers, ignoring trailing nulls + * + * @param buffer1 the first buffer + * @param offset1 the first offset + * @param length1 the first length + * @param buffer2 the second buffer + * @param offset2 the second offset + * @param length2 the second length + * @return {@code true} if buffer1 and buffer2 have same contents, having regard to trailing nulls + */ + public static boolean isEqualWithNull( + final byte[] buffer1, final int offset1, final int length1, + final byte[] buffer2, final int offset2, final int length2){ + return isEqual(buffer1, offset1, length1, buffer2, offset2, length2, true); + } + + /** + * Returns true if the first N bytes of an array are all zero + * + * @param a + * The array to check + * @param size + * The number of characters to check (not the size of the array) + * @return true if the first N bytes are zero + */ + public static boolean isArrayZero(final byte[] a, final int size) { + for (int i = 0; i < size; i++) { + if (a[i] != 0) { + return false; + } + } + return true; + } + + /** + * Returns a "sanitized" version of the string given as arguments, + * where sanitized means non-printable characters have been + * replaced with a question mark and the outcome is not longer + * than 255 chars. + * + * <p>This method is used to clean up file names when they are + * used in exception messages as they may end up in log files or + * as console output and may have been read from a corrupted + * input.</p> + * + * @param s the string to sanitize + * @return a sanitized version of the argument + * @since Compress 1.12 + */ + public static String sanitize(final String s) { + final char[] cs = s.toCharArray(); + final char[] chars = cs.length <= MAX_SANITIZED_NAME_LENGTH ? cs : Arrays.copyOf(cs, MAX_SANITIZED_NAME_LENGTH); + if (cs.length > MAX_SANITIZED_NAME_LENGTH) { + for (int i = MAX_SANITIZED_NAME_LENGTH - 3; i < MAX_SANITIZED_NAME_LENGTH; i++) { + chars[i] = '.'; + } + } + final StringBuilder sb = new StringBuilder(); + for (final char c : chars) { + if (!Character.isISOControl(c)) { + final Character.UnicodeBlock block = Character.UnicodeBlock.of(c); + if (block != null && block != Character.UnicodeBlock.SPECIALS) { + sb.append(c); + continue; + } + } + sb.append('?'); + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/BitInputStream.java b/src/main/java/org/apache/commons/compress/utils/BitInputStream.java new file mode 100644 index 000000000..8abff5f78 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/BitInputStream.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; + +/** + * Reads bits from an InputStream. + * @since 1.10 + * @NotThreadSafe + */ +public class BitInputStream implements Closeable { + private static final int MAXIMUM_CACHE_SIZE = 63; // bits in long minus sign bit + private static final long[] MASKS = new long[MAXIMUM_CACHE_SIZE + 1]; + + static { + for (int i = 1; i <= MAXIMUM_CACHE_SIZE; i++) { + MASKS[i] = (MASKS[i - 1] << 1) + 1; + } + } + + private final CountingInputStream in; + private final ByteOrder byteOrder; + private long bitsCached = 0; + private int bitsCachedSize = 0; + + /** + * Constructor taking an InputStream and its bit arrangement. + * @param in the InputStream + * @param byteOrder the bit arrangement across byte boundaries, + * either BIG_ENDIAN (aaaaabbb bb000000) or LITTLE_ENDIAN (bbbaaaaa 000000bb) + */ + public BitInputStream(final InputStream in, final ByteOrder byteOrder) { + this.in = new CountingInputStream(in); + this.byteOrder = byteOrder; + } + + @Override + public void close() throws IOException { + in.close(); + } + + /** + * Clears the cache of bits that have been read from the + * underlying stream but not yet provided via {@link #readBits}. + */ + public void clearBitCache() { + bitsCached = 0; + bitsCachedSize = 0; + } + + /** + * Returns at most 63 bits read from the underlying stream. + * + * @param count the number of bits to read, must be a positive + * number not bigger than 63. + * @return the bits concatenated as a long using the stream's byte order. + * -1 if the end of the underlying stream has been reached before reading + * the requested number of bits + * @throws IOException on error + */ + public long readBits(final int count) throws IOException { + if (count < 0 || count > MAXIMUM_CACHE_SIZE) { + throw new IllegalArgumentException("count must not be negative or greater than " + MAXIMUM_CACHE_SIZE); + } + if (ensureCache(count)) { + return -1; + } + + if (bitsCachedSize < count) { + return processBitsGreater57(count); + } + return readCachedBits(count); + } + + /** + * Returns the number of bits that can be read from this input + * stream without reading from the underlying input stream at all. + * @return estimate of the number of bits that can be read without reading from the underlying stream + * @since 1.16 + */ + public int bitsCached() { + return bitsCachedSize; + } + + /** + * Returns an estimate of the number of bits that can be read from + * this input stream without blocking by the next invocation of a + * method for this input stream. + * @throws IOException if the underlying stream throws one when calling available + * @return estimate of the number of bits that can be read without blocking + * @since 1.16 + */ + public long bitsAvailable() throws IOException { + return bitsCachedSize + ((long) Byte.SIZE) * in.available(); + } + + /** + * Drops bits until the next bits will be read from a byte boundary. + * @since 1.16 + */ + public void alignWithByteBoundary() { + int toSkip = bitsCachedSize % Byte.SIZE; + if (toSkip > 0) { + readCachedBits(toSkip); + } + } + + /** + * Returns the number of bytes read from the underlying stream. + * + * <p>This includes the bytes read to fill the current cache and + * not read as bits so far.</p> + * @return the number of bytes read from the underlying stream + * @since 1.17 + */ + public long getBytesRead() { + return in.getBytesRead(); + } + + private long processBitsGreater57(final int count) throws IOException { + final long bitsOut; + int overflowBits = 0; + long overflow = 0L; + + // bitsCachedSize >= 57 and left-shifting it 8 bits would cause an overflow + int bitsToAddCount = count - bitsCachedSize; + overflowBits = Byte.SIZE - bitsToAddCount; + final long nextByte = in.read(); + if (nextByte < 0) { + return nextByte; + } + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + long bitsToAdd = nextByte & MASKS[bitsToAddCount]; + bitsCached |= (bitsToAdd << bitsCachedSize); + overflow = (nextByte >>> bitsToAddCount) & MASKS[overflowBits]; + } else { + bitsCached <<= bitsToAddCount; + long bitsToAdd = (nextByte >>> (overflowBits)) & MASKS[bitsToAddCount]; + bitsCached |= bitsToAdd; + overflow = nextByte & MASKS[overflowBits]; + } + bitsOut = bitsCached & MASKS[count]; + bitsCached = overflow; + bitsCachedSize = overflowBits; + return bitsOut; + } + + private long readCachedBits(int count) { + final long bitsOut; + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + bitsOut = (bitsCached & MASKS[count]); + bitsCached >>>= count; + } else { + bitsOut = (bitsCached >> (bitsCachedSize - count)) & MASKS[count]; + } + bitsCachedSize -= count; + return bitsOut; + } + + /** + * Fills the cache up to 56 bits + * @param count + * @return return true, when EOF + * @throws IOException + */ + private boolean ensureCache(final int count) throws IOException { + while (bitsCachedSize < count && bitsCachedSize < 57) { + final long nextByte = in.read(); + if (nextByte < 0) { + return true; + } + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + bitsCached |= (nextByte << bitsCachedSize); + } else { + bitsCached <<= Byte.SIZE; + bitsCached |= nextByte; + } + bitsCachedSize += Byte.SIZE; + } + return false; + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/BoundedInputStream.java b/src/main/java/org/apache/commons/compress/utils/BoundedInputStream.java new file mode 100644 index 000000000..8c3465ded --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/BoundedInputStream.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A stream that limits reading from a wrapped stream to a given number of bytes. + * @NotThreadSafe + * @since 1.6 + */ +public class BoundedInputStream extends InputStream { + private final InputStream in; + private long bytesRemaining; + + /** + * Creates the stream that will at most read the given amount of + * bytes from the given stream. + * @param in the stream to read from + * @param size the maximum amount of bytes to read + */ + public BoundedInputStream(final InputStream in, final long size) { + this.in = in; + bytesRemaining = size; + } + + @Override + public int read() throws IOException { + if (bytesRemaining > 0) { + --bytesRemaining; + return in.read(); + } + return -1; + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (bytesRemaining == 0) { + return -1; + } + int bytesToRead = len; + if (bytesToRead > bytesRemaining) { + bytesToRead = (int) bytesRemaining; + } + final int bytesRead = in.read(b, off, bytesToRead); + if (bytesRead >= 0) { + bytesRemaining -= bytesRead; + } + return bytesRead; + } + + @Override + public void close() { + // there isn't anything to close in this stream and the nested + // stream is controlled externally + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/ByteUtils.java b/src/main/java/org/apache/commons/compress/utils/ByteUtils.java new file mode 100644 index 000000000..85b4118e9 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ByteUtils.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Utility methods for reading and writing bytes. + * @since 1.14 + */ +public final class ByteUtils { + private ByteUtils() { /* no instances */ } + + /** + * Used to supply bytes. + * @since 1.14 + */ + public interface ByteSupplier { + /** + * The contract is similar to {@link InputStream#read()}, return + * the byte as an unsigned int, -1 if there are no more bytes. + * @return the supplied byte or -1 if there are no more bytes + * @throws IOException if supplying fails + */ + int getAsByte() throws IOException; + } + + /** + * Used to consume bytes. + * @since 1.14 + */ + public interface ByteConsumer { + /** + * The contract is similar to {@link OutputStream#write(int)}, + * consume the lower eight bytes of the int as a byte. + * @param b the byte to consume + * @throws IOException if consuming fails + */ + void accept(int b) throws IOException; + } + + /** + * Reads the given byte array as a little endian long. + * @param bytes the byte array to convert + * @return the number read + */ + public static long fromLittleEndian(byte[] bytes) { + return fromLittleEndian(bytes, 0, bytes.length); + } + + /** + * Reads the given byte array as a little endian long. + * @param bytes the byte array to convert + * @param off the offset into the array that starts the value + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + */ + public static long fromLittleEndian(byte[] bytes, final int off, final int length) { + checkReadLength(length); + long l = 0; + for (int i = 0; i < length; i++) { + l |= (bytes[off + i] & 0xffL) << (8 * i); + } + return l; + } + + /** + * Reads the given number of bytes from the given stream as a little endian long. + * @param in the stream to read from + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + * @throws IOException if reading fails or the stream doesn't + * contain the given number of bytes anymore + */ + public static long fromLittleEndian(InputStream in, int length) throws IOException { + // somewhat duplicates the ByteSupplier version in order to save the creation of a wrapper object + checkReadLength(length); + long l = 0; + for (int i = 0; i < length; i++) { + long b = in.read(); + if (b == -1) { + throw new IOException("premature end of data"); + } + l |= (b << (i * 8)); + } + return l; + } + + /** + * Reads the given number of bytes from the given supplier as a little endian long. + * + * <p>Typically used by our InputStreams that need to count the + * bytes read as well.</p> + * + * @param supplier the supplier for bytes + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + * @throws IOException if the supplier fails or doesn't supply the + * given number of bytes anymore + */ + public static long fromLittleEndian(ByteSupplier supplier, final int length) throws IOException { + checkReadLength(length); + long l = 0; + for (int i = 0; i < length; i++) { + long b = supplier.getAsByte(); + if (b == -1) { + throw new IOException("premature end of data"); + } + l |= (b << (i * 8)); + } + return l; + } + + /** + * Reads the given number of bytes from the given input as little endian long. + * @param in the input to read from + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + * @throws IOException if reading fails or the stream doesn't + * contain the given number of bytes anymore + */ + public static long fromLittleEndian(DataInput in, int length) throws IOException { + // somewhat duplicates the ByteSupplier version in order to save the creation of a wrapper object + checkReadLength(length); + long l = 0; + for (int i = 0; i < length; i++) { + long b = in.readUnsignedByte(); + l |= (b << (i * 8)); + } + return l; + } + + /** + * Inserts the given value into the array as a little endian + * sequence of the given length starting at the given offset. + * @param b the array to write into + * @param value the value to insert + * @param off the offset into the array that receives the first byte + * @param length the number of bytes to use to represent the value + */ + public static void toLittleEndian(final byte[] b, final long value, final int off, final int length) { + long num = value; + for (int i = 0; i < length; i++) { + b[off + i] = (byte) (num & 0xff); + num >>= 8; + } + } + + /** + * Writes the given value to the given stream as a little endian + * array of the given length. + * @param out the stream to write to + * @param value the value to write + * @param length the number of bytes to use to represent the value + * @throws IOException if writing fails + */ + public static void toLittleEndian(OutputStream out, final long value, final int length) + throws IOException { + // somewhat duplicates the ByteConsumer version in order to save the creation of a wrapper object + long num = value; + for (int i = 0; i < length; i++) { + out.write((int) (num & 0xff)); + num >>= 8; + } + } + + /** + * Provides the given value to the given consumer as a little endian + * sequence of the given length. + * @param consumer the consumer to provide the bytes to + * @param value the value to provide + * @param length the number of bytes to use to represent the value + * @throws IOException if writing fails + */ + public static void toLittleEndian(ByteConsumer consumer, final long value, final int length) + throws IOException { + long num = value; + for (int i = 0; i < length; i++) { + consumer.accept((int) (num & 0xff)); + num >>= 8; + } + } + + /** + * Writes the given value to the given stream as a little endian + * array of the given length. + * @param out the output to write to + * @param value the value to write + * @param length the number of bytes to use to represent the value + * @throws IOException if writing fails + */ + public static void toLittleEndian(DataOutput out, final long value, final int length) + throws IOException { + // somewhat duplicates the ByteConsumer version in order to save the creation of a wrapper object + long num = value; + for (int i = 0; i < length; i++) { + out.write((int) (num & 0xff)); + num >>= 8; + } + } + + /** + * {@link ByteSupplier} based on {@link InputStream}. + * @since 1.14 + */ + public static class InputStreamByteSupplier implements ByteSupplier { + private final InputStream is; + public InputStreamByteSupplier(InputStream is) { + this.is = is; + } + @Override + public int getAsByte() throws IOException { + return is.read(); + } + } + + /** + * {@link ByteConsumer} based on {@link OutputStream}. + * @since 1.14 + */ + public static class OutputStreamByteConsumer implements ByteConsumer { + private final OutputStream os; + public OutputStreamByteConsumer(OutputStream os) { + this.os = os; + } + @Override + public void accept(int b) throws IOException { + os.write(b); + } + } + + private static final void checkReadLength(int length) { + if (length > 8) { + throw new IllegalArgumentException("can't read more than eight bytes into a long value"); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java b/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java new file mode 100644 index 000000000..1b50e8ce5 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.InputStream; +import java.util.zip.CRC32; + +/** + * A stream that verifies the CRC of the data read once the stream is + * exhausted. + * @NotThreadSafe + * @since 1.6 + */ +public class CRC32VerifyingInputStream extends ChecksumVerifyingInputStream { + + /** + * @param in the stream to wrap + * @param size the of the stream's content + * @param expectedCrc32 the expected checksum + */ + public CRC32VerifyingInputStream(final InputStream in, final long size, final int expectedCrc32) { + this(in, size, expectedCrc32 & 0xFFFFffffL); + } + + /** + * @since 1.7 + * @param in the stream to wrap + * @param size the of the stream's content + * @param expectedCrc32 the expected checksum + */ + public CRC32VerifyingInputStream(final InputStream in, final long size, final long expectedCrc32) { + super(new CRC32(), in, size, expectedCrc32); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/CharsetNames.java b/src/main/java/org/apache/commons/compress/utils/CharsetNames.java new file mode 100644 index 000000000..f6b9cc8ed --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CharsetNames.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +/** + * Character encoding names required of every implementation of the Java platform. + * + * From the Java documentation <a href="https://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard + * charsets</a>: + * <p> + * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the + * release documentation for your implementation to see if any other encodings are supported. Consult the release + * documentation for your implementation to see if any other encodings are supported. </cite> + * </p> + * + * <dl> + * <dt><code>US-ASCII</code></dt> + * <dd>Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</dd> + * <dt><code>ISO-8859-1</code></dt> + * <dd>ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</dd> + * <dt><code>UTF-8</code></dt> + * <dd>Eight-bit Unicode Transformation Format.</dd> + * <dt><code>UTF-16BE</code></dt> + * <dd>Sixteen-bit Unicode Transformation Format, big-endian byte order.</dd> + * <dt><code>UTF-16LE</code></dt> + * <dd>Sixteen-bit Unicode Transformation Format, little-endian byte order.</dd> + * <dt><code>UTF-16</code></dt> + * <dd>Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order + * accepted on input, big-endian used on output.)</dd> + * </dl> + * + * <p>This perhaps would best belong in the [lang] project. Even if a similar interface is defined in [lang], it is not + * foreseen that [compress] would be made to depend on [lang].</p> + * + * @see <a href="https://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @since 1.4 + */ +public class CharsetNames { + /** + * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final String ISO_8859_1 = "ISO-8859-1"; + + /** + * <p> + * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final String US_ASCII = "US-ASCII"; + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark + * (either order accepted on input, big-endian used on output) + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final String UTF_16 = "UTF-16"; + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, big-endian byte order. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final String UTF_16BE = "UTF-16BE"; + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, little-endian byte order. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final String UTF_16LE = "UTF-16LE"; + + /** + * <p> + * Eight-bit Unicode Transformation Format. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final String UTF_8 = "UTF-8"; +} diff --git a/src/main/java/org/apache/commons/compress/utils/Charsets.java b/src/main/java/org/apache/commons/compress/utils/Charsets.java new file mode 100644 index 000000000..9f5240af8 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/Charsets.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +/** + * Charsets required of every implementation of the Java platform. + * + * From the Java documentation <a href="https://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard + * charsets</a>: + * <p> + * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the + * release documentation for your implementation to see if any other encodings are supported. Consult the release + * documentation for your implementation to see if any other encodings are supported. </cite> + * </p> + * + * <dl> + * <dt><code>US-ASCII</code></dt> + * <dd>Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</dd> + * <dt><code>ISO-8859-1</code></dt> + * <dd>ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</dd> + * <dt><code>UTF-8</code></dt> + * <dd>Eight-bit Unicode Transformation Format.</dd> + * <dt><code>UTF-16BE</code></dt> + * <dd>Sixteen-bit Unicode Transformation Format, big-endian byte order.</dd> + * <dt><code>UTF-16LE</code></dt> + * <dd>Sixteen-bit Unicode Transformation Format, little-endian byte order.</dd> + * <dt><code>UTF-16</code></dt> + * <dd>Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order + * accepted on input, big-endian used on output.)</dd> + * </dl> + * + * <p>This class best belongs in the Commons Lang or IO project. Even if a similar class is defined in another Commons + * component, it is not foreseen that Commons Compress would be made to depend on another Commons component.</p> + * + * @see <a href="https://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @see StandardCharsets + * @since 1.4 + */ +public class Charsets { + + // + // This class should only contain Charset instances for required encodings. This guarantees that it will load correctly and + // without delay on all Java platforms. + // + + /** + * Returns the given Charset or the default Charset if the given Charset is null. + * + * @param charset + * A charset or null. + * @return the given Charset or the default Charset if the given Charset is null + */ + public static Charset toCharset(final Charset charset) { + return charset == null ? Charset.defaultCharset() : charset; + } + + /** + * Returns a Charset for the named charset. If the name is null, return the default Charset. + * + * @param charset + * The name of the requested charset, may be null. + * @return a Charset for the named charset + * @throws java.nio.charset.UnsupportedCharsetException + * If the named charset is unavailable + * @throws java.nio.charset.IllegalCharsetNameException + * If the given charset name is illegal + */ + public static Charset toCharset(final String charset) { + return charset == null ? Charset.defaultCharset() : Charset.forName(charset); + } + + /** + * CharsetNamesISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + public static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1; + + /** + * <p> + * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + public static final Charset US_ASCII = StandardCharsets.US_ASCII; + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark + * (either order accepted on input, big-endian used on output) + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + public static final Charset UTF_16 = StandardCharsets.UTF_16; + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, big-endian byte order. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + public static final Charset UTF_16BE = StandardCharsets.UTF_16BE; + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, little-endian byte order. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + public static final Charset UTF_16LE = StandardCharsets.UTF_16LE; + + /** + * <p> + * Eight-bit Unicode Transformation Format. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="https://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + public static final Charset UTF_8 = StandardCharsets.UTF_8; +} diff --git a/src/main/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStream.java b/src/main/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStream.java new file mode 100644 index 000000000..4a408a564 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStream.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.Checksum; + +/** + * A stream that calculates the checksum of the data read. + * @NotThreadSafe + * @since 1.14 + */ +public class ChecksumCalculatingInputStream extends InputStream { + private final InputStream in; + private final Checksum checksum; + + public ChecksumCalculatingInputStream(final Checksum checksum, final InputStream in) { + + if ( checksum == null ){ + throw new NullPointerException("Parameter checksum must not be null"); + } + + if ( in == null ){ + throw new NullPointerException("Parameter in must not be null"); + } + + this.checksum = checksum; + this.in = in; + } + + /** + * Reads a single byte from the stream + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read() throws IOException { + final int ret = in.read(); + if (ret >= 0) { + checksum.update(ret); + } + return ret; + } + + /** + * Reads a byte array from the stream + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Reads from the stream into a byte array. + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + final int ret = in.read(b, off, len); + if (ret >= 0) { + checksum.update(b, off, ret); + } + return ret; + } + + @Override + public long skip(final long n) throws IOException { + // Can't really skip, we have to hash everything to verify the checksum + if (read() >= 0) { + return 1; + } + return 0; + } + + /** + * Returns the calculated checksum. + * @return the calculated checksum. + */ + public long getValue() { + return checksum.getValue(); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java b/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java new file mode 100644 index 000000000..a7d8d6ce5 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.Checksum; + +/** + * A stream that verifies the checksum of the data read once the stream is + * exhausted. + * @NotThreadSafe + * @since 1.7 + */ +public class ChecksumVerifyingInputStream extends InputStream { + private final InputStream in; + private long bytesRemaining; + private final long expectedChecksum; + private final Checksum checksum; + + public ChecksumVerifyingInputStream(final Checksum checksum, final InputStream in, + final long size, final long expectedChecksum) { + this.checksum = checksum; + this.in = in; + this.expectedChecksum = expectedChecksum; + this.bytesRemaining = size; + } + + /** + * Reads a single byte from the stream + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read() throws IOException { + if (bytesRemaining <= 0) { + return -1; + } + final int ret = in.read(); + if (ret >= 0) { + checksum.update(ret); + --bytesRemaining; + } + if (bytesRemaining == 0 && expectedChecksum != checksum.getValue()) { + throw new IOException("Checksum verification failed"); + } + return ret; + } + + /** + * Reads a byte array from the stream + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Reads from the stream into a byte array. + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + final int ret = in.read(b, off, len); + if (ret >= 0) { + checksum.update(b, off, ret); + bytesRemaining -= ret; + } + if (bytesRemaining <= 0 && expectedChecksum != checksum.getValue()) { + throw new IOException("Checksum verification failed"); + } + return ret; + } + + @Override + public long skip(final long n) throws IOException { + // Can't really skip, we have to hash everything to verify the checksum + if (read() >= 0) { + return 1; + } + return 0; + } + + @Override + public void close() throws IOException { + in.close(); + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/CloseShieldFilterInputStream.java b/src/main/java/org/apache/commons/compress/utils/CloseShieldFilterInputStream.java new file mode 100644 index 000000000..a0ec8ff4b --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CloseShieldFilterInputStream.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Re-implements {@link FilterInputStream#close()} to do nothing. + * @since 1.14 + */ +public class CloseShieldFilterInputStream extends FilterInputStream { + + public CloseShieldFilterInputStream(InputStream in) { + super(in); + } + + @Override + public void close() throws IOException { + // NO IMPLEMENTATION. + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/CountingInputStream.java b/src/main/java/org/apache/commons/compress/utils/CountingInputStream.java new file mode 100644 index 000000000..461071e8d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CountingInputStream.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Stream that tracks the number of bytes read. + * @since 1.3 + * @NotThreadSafe + */ +public class CountingInputStream extends FilterInputStream { + private long bytesRead; + + public CountingInputStream(final InputStream in) { + super(in); + } + + @Override + public int read() throws IOException { + final int r = in.read(); + if (r >= 0) { + count(1); + } + return r; + } + @Override + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + final int r = in.read(b, off, len); + if (r >= 0) { + count(r); + } + return r; + } + /** + * Increments the counter of already read bytes. + * Doesn't increment if the EOF has been hit (read == -1) + * + * @param read the number of bytes read + */ + protected final void count(final long read) { + if (read != -1) { + bytesRead += read; + } + } + + /** + * Returns the current number of bytes read from this stream. + * @return the number of read bytes + */ + public long getBytesRead() { + return bytesRead; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/CountingOutputStream.java b/src/main/java/org/apache/commons/compress/utils/CountingOutputStream.java new file mode 100644 index 000000000..ac886bcd4 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CountingOutputStream.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Stream that tracks the number of bytes read. + * @since 1.3 + * @NotThreadSafe + */ +public class CountingOutputStream extends FilterOutputStream { + private long bytesWritten = 0; + + public CountingOutputStream(final OutputStream out) { + super(out); + } + + @Override + public void write(final int b) throws IOException { + out.write(b); + count(1); + } + @Override + public void write(final byte[] b) throws IOException { + write(b, 0, b.length); + } + @Override + public void write(final byte[] b, final int off, final int len) throws IOException { + out.write(b, off, len); + count(len); + } + + /** + * Increments the counter of already written bytes. + * Doesn't increment if the EOF has been hit (written == -1) + * + * @param written the number of bytes written + */ + protected void count(final long written) { + if (written != -1) { + bytesWritten += written; + } + } + + /** + * Returns the current number of bytes written to this stream. + * @return the number of written bytes + */ + public long getBytesWritten() { + return bytesWritten; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStream.java b/src/main/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStream.java new file mode 100644 index 000000000..360f380cd --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStream.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.WritableByteChannel; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * This class supports writing to an Outputstream or WritableByteChannel in fixed length blocks. + * <p>It can be be used to support output to devices such as tape drives that require output in this + * format. If the final block does not have enough content to fill an entire block, the output will + * be padded to a full block size.</p> + * + * <p>This class can be used to support TAR,PAX, and CPIO blocked output to character special devices. + * It is not recommended that this class be used unless writing to such devices, as the padding + * serves no useful purpose in such cases.</p> + * + * <p>This class should normally wrap a FileOutputStream or associated WritableByteChannel directly. + * If there is an intervening filter that modified the output, such as a CompressorOutputStream, or + * performs its own buffering, such as BufferedOutputStream, output to the device may + * no longer be of the specified size.</p> + * + * <p>Any content written to this stream should be self-delimiting and should tolerate any padding + * added to fill the last block.</p> + * + * @since 1.15 + */ +public class FixedLengthBlockOutputStream extends OutputStream implements WritableByteChannel { + + private final WritableByteChannel out; + private final int blockSize; + private final ByteBuffer buffer; + private final AtomicBoolean closed = new AtomicBoolean(false); + + /** + * Create a fixed length block output stream with given destination stream and block size + * @param os The stream to wrap. + * @param blockSize The block size to use. + */ + public FixedLengthBlockOutputStream(OutputStream os, int blockSize) { + if (os instanceof FileOutputStream) { + FileOutputStream fileOutputStream = (FileOutputStream) os; + out = fileOutputStream.getChannel(); + buffer = ByteBuffer.allocateDirect(blockSize); + } else { + out = new BufferAtATimeOutputChannel(os); + buffer = ByteBuffer.allocate(blockSize); + } + this.blockSize = blockSize; + } + /** + * Create a fixed length block output stream with given destination writable byte channel and block size + * @param out The writable byte channel to wrap. + * @param blockSize The block size to use. + */ + public FixedLengthBlockOutputStream(WritableByteChannel out, int blockSize) { + this.out = out; + this.blockSize = blockSize; + this.buffer = ByteBuffer.allocateDirect(blockSize); + } + + private void maybeFlush() throws IOException { + if (!buffer.hasRemaining()) { + writeBlock(); + } + } + + private void writeBlock() throws IOException { + buffer.flip(); + int i = out.write(buffer); + boolean hasRemaining = buffer.hasRemaining(); + if (i != blockSize || hasRemaining) { + String msg = String + .format("Failed to write %,d bytes atomically. Only wrote %,d", + blockSize, i); + throw new IOException(msg); + } + buffer.clear(); + } + + @Override + public void write(int b) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + buffer.put((byte) b); + maybeFlush(); + } + + @Override + public void write(byte[] b, final int offset, final int length) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + int off = offset; + int len = length; + while (len > 0) { + int n = Math.min(len, buffer.remaining()); + buffer.put(b, off, n); + maybeFlush(); + len -= n; + off += n; + } + } + + @Override + public int write(ByteBuffer src) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + int srcRemaining = src.remaining(); + + if (srcRemaining < buffer.remaining()) { + // if don't have enough bytes in src to fill up a block we must buffer + buffer.put(src); + } else { + int srcLeft = srcRemaining; + int savedLimit = src.limit(); + // If we're not at the start of buffer, we have some bytes already buffered + // fill up the reset of buffer and write the block. + if (buffer.position() != 0) { + int n = buffer.remaining(); + src.limit(src.position() + n); + buffer.put(src); + writeBlock(); + srcLeft -= n; + } + // whilst we have enough bytes in src for complete blocks, + // write them directly from src without copying them to buffer + while (srcLeft >= blockSize) { + src.limit(src.position() + blockSize); + out.write(src); + srcLeft -= blockSize; + } + // copy any remaining bytes into buffer + src.limit(savedLimit); + buffer.put(src); + } + return srcRemaining; + } + + @Override + public boolean isOpen() { + if (!out.isOpen()) { + closed.set(true); + } + return !closed.get(); + } + + /** + * Potentially pads and then writes the current block to the underlying stream. + * @throws IOException if writing fails + */ + public void flushBlock() throws IOException { + if (buffer.position() != 0) { + padBlock(); + writeBlock(); + } + } + + @Override + public void close() throws IOException { + if (closed.compareAndSet(false, true)) { + try { + flushBlock(); + } finally { + out.close(); + } + } + } + + private void padBlock() { + buffer.order(ByteOrder.nativeOrder()); + int bytesToWrite = buffer.remaining(); + if (bytesToWrite > 8) { + int align = buffer.position() & 7; + if (align != 0) { + int limit = 8 - align; + for (int i = 0; i < limit; i++) { + buffer.put((byte) 0); + } + bytesToWrite -= limit; + } + + while (bytesToWrite >= 8) { + buffer.putLong(0L); + bytesToWrite -= 8; + } + } + while (buffer.hasRemaining()) { + buffer.put((byte) 0); + } + } + + /** + * Helper class to provide channel wrapper for arbitrary output stream that doesn't alter the + * size of writes. We can't use Channels.newChannel, because for non FileOutputStreams, it + * breaks up writes into 8KB max chunks. Since the purpose of this class is to always write + * complete blocks, we need to write a simple class to take care of it. + */ + private static class BufferAtATimeOutputChannel implements WritableByteChannel { + + private final OutputStream out; + private final AtomicBoolean closed = new AtomicBoolean(false); + + private BufferAtATimeOutputChannel(OutputStream out) { + this.out = out; + } + + @Override + public int write(ByteBuffer buffer) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + if (!buffer.hasArray()) { + throw new IllegalArgumentException("direct buffer somehow written to BufferAtATimeOutputChannel"); + } + + try { + int pos = buffer.position(); + int len = buffer.limit() - pos; + out.write(buffer.array(), buffer.arrayOffset() + pos, len); + buffer.position(buffer.limit()); + return len; + } catch (IOException e) { + try { + close(); + } catch (IOException ignored) { //NOSONAR + } + throw e; + } + } + + @Override + public boolean isOpen() { + return !closed.get(); + } + + @Override + public void close() throws IOException { + if (closed.compareAndSet(false, true)) { + out.close(); + } + } + + } + + +} diff --git a/src/main/java/org/apache/commons/compress/utils/FlushShieldFilterOutputStream.java b/src/main/java/org/apache/commons/compress/utils/FlushShieldFilterOutputStream.java new file mode 100644 index 000000000..239e82374 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/FlushShieldFilterOutputStream.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Re-implements {@link FilterOutputStream#flush()} to do nothing. + */ +public class FlushShieldFilterOutputStream extends FilterOutputStream { + + public FlushShieldFilterOutputStream(OutputStream out) { + super(out); + } + + @Override + public void flush() throws IOException { + // NO IMPLEMENTATION. + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/IOUtils.java b/src/main/java/org/apache/commons/compress/utils/IOUtils.java new file mode 100644 index 000000000..50577c90d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/IOUtils.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.ReadableByteChannel; + +/** + * Utility functions + * @Immutable (has mutable data but it is write-only) + */ +public final class IOUtils { + + private static final int COPY_BUF_SIZE = 8024; + private static final int SKIP_BUF_SIZE = 4096; + + // This buffer does not need to be synchronised because it is write only; the contents are ignored + // Does not affect Immutability + private static final byte[] SKIP_BUF = new byte[SKIP_BUF_SIZE]; + + /** Private constructor to prevent instantiation of this utility class. */ + private IOUtils(){ + } + + /** + * Copies the content of a InputStream into an OutputStream. + * Uses a default buffer size of 8024 bytes. + * + * @param input + * the InputStream to copy + * @param output + * the target Stream + * @return the number of bytes copied + * @throws IOException + * if an error occurs + */ + public static long copy(final InputStream input, final OutputStream output) throws IOException { + return copy(input, output, COPY_BUF_SIZE); + } + + /** + * Copies the content of a InputStream into an OutputStream + * + * @param input + * the InputStream to copy + * @param output + * the target Stream + * @param buffersize + * the buffer size to use, must be bigger than 0 + * @return the number of bytes copied + * @throws IOException + * if an error occurs + * @throws IllegalArgumentException + * if buffersize is smaller than or equal to 0 + */ + public static long copy(final InputStream input, final OutputStream output, final int buffersize) throws IOException { + if (buffersize < 1) { + throw new IllegalArgumentException("buffersize must be bigger than 0"); + } + final byte[] buffer = new byte[buffersize]; + int n = 0; + long count=0; + while (-1 != (n = input.read(buffer))) { + output.write(buffer, 0, n); + count += n; + } + return count; + } + + /** + * Skips the given number of bytes by repeatedly invoking skip on + * the given input stream if necessary. + * + * <p>In a case where the stream's skip() method returns 0 before + * the requested number of bytes has been skip this implementation + * will fall back to using the read() method.</p> + * + * <p>This method will only skip less than the requested number of + * bytes if the end of the input stream has been reached.</p> + * + * @param input stream to skip bytes in + * @param numToSkip the number of bytes to skip + * @return the number of bytes actually skipped + * @throws IOException on error + */ + public static long skip(final InputStream input, long numToSkip) throws IOException { + final long available = numToSkip; + while (numToSkip > 0) { + final long skipped = input.skip(numToSkip); + if (skipped == 0) { + break; + } + numToSkip -= skipped; + } + + while (numToSkip > 0) { + final int read = readFully(input, SKIP_BUF, 0, + (int) Math.min(numToSkip, SKIP_BUF_SIZE)); + if (read < 1) { + break; + } + numToSkip -= read; + } + return available - numToSkip; + } + + /** + * Reads as much from input as possible to fill the given array. + * + * <p>This method may invoke read repeatedly to fill the array and + * only read less bytes than the length of the array if the end of + * the stream has been reached.</p> + * + * @param input stream to read from + * @param b buffer to fill + * @return the number of bytes actually read + * @throws IOException on error + */ + public static int readFully(final InputStream input, final byte[] b) throws IOException { + return readFully(input, b, 0, b.length); + } + + /** + * Reads as much from input as possible to fill the given array + * with the given amount of bytes. + * + * <p>This method may invoke read repeatedly to read the bytes and + * only read less bytes than the requested length if the end of + * the stream has been reached.</p> + * + * @param input stream to read from + * @param b buffer to fill + * @param offset offset into the buffer to start filling at + * @param len of bytes to read + * @return the number of bytes actually read + * @throws IOException + * if an I/O error has occurred + */ + public static int readFully(final InputStream input, final byte[] b, final int offset, final int len) + throws IOException { + if (len < 0 || offset < 0 || len + offset > b.length) { + throw new IndexOutOfBoundsException(); + } + int count = 0, x = 0; + while (count != len) { + x = input.read(b, offset + count, len - count); + if (x == -1) { + break; + } + count += x; + } + return count; + } + + /** + * Reads {@code b.remaining()} bytes from the given channel + * starting at the current channel's position. + * + * <p>This method reads repeatedly from the channel until the + * requested number of bytes are read. This method blocks until + * the requested number of bytes are read, the end of the channel + * is detected, or an exception is thrown.</p> + * + * @param channel the channel to read from + * @param b the buffer into which the data is read. + * @throws IOException - if an I/O error occurs. + * @throws EOFException - if the channel reaches the end before reading all the bytes. + */ + public static void readFully(ReadableByteChannel channel, ByteBuffer b) throws IOException { + final int expectedLength = b.remaining(); + int read = 0; + while (read < expectedLength) { + int readNow = channel.read(b); + if (readNow <= 0) { + break; + } + read += readNow; + } + if (read < expectedLength) { + throw new EOFException(); + } + } + + // toByteArray(InputStream) copied from: + // commons/proper/io/trunk/src/main/java/org/apache/commons/io/IOUtils.java?revision=1428941 + // January 8th, 2013 + // + // Assuming our copy() works just as well as theirs! :-) + + /** + * Gets the contents of an <code>InputStream</code> as a <code>byte[]</code>. + * <p> + * This method buffers the input internally, so there is no need to use a + * <code>BufferedInputStream</code>. + * + * @param input the <code>InputStream</code> to read from + * @return the requested byte array + * @throws NullPointerException if the input is null + * @throws IOException if an I/O error occurs + * @since 1.5 + */ + public static byte[] toByteArray(final InputStream input) throws IOException { + final ByteArrayOutputStream output = new ByteArrayOutputStream(); + copy(input, output); + return output.toByteArray(); + } + + /** + * Closes the given Closeable and swallows any IOException that may occur. + * @param c Closeable to close, can be null + * @since 1.7 + */ + public static void closeQuietly(final Closeable c) { + if (c != null) { + try { + c.close(); + } catch (final IOException ignored) { // NOPMD + } + } + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/InputStreamStatistics.java b/src/main/java/org/apache/commons/compress/utils/InputStreamStatistics.java new file mode 100644 index 000000000..569ab3687 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/InputStreamStatistics.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +/** + * This interface provides statistics on the current decompression stream. + * The stream consumer can use that statistics to handle abnormal + * compression ratios, i.e. to prevent zip bombs. + * + * @since 1.17 + */ +public interface InputStreamStatistics { + /** + * @return the amount of raw or compressed bytes read by the stream + */ + long getCompressedCount(); + + /** + * @return the amount of decompressed bytes returned by the stream + */ + long getUncompressedCount(); +} diff --git a/src/main/java/org/apache/commons/compress/utils/Iterators.java b/src/main/java/org/apache/commons/compress/utils/Iterators.java new file mode 100644 index 000000000..0db0c3601 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/Iterators.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Objects; + +/** + * Iterator utilities. + * + * @since 1.13. + */ +public class Iterators { + + /** + * Adds all the elements in the source {@code iterator} to the target + * {@code collection}. + * + * <p> + * When this method returns, the {@code iterator} will be "empty": its + * {@code hasNext()} method returns {@code false}. + * </p> + * + * @param <T> type of the elements contained inside the collection + * @param collection target collection + * @param iterator source + * @return {@code true} if the target {@code collection} was modified as a + * result of this operation + */ + public static <T> boolean addAll(final Collection<T> collection, final Iterator<? extends T> iterator) { + Objects.requireNonNull(collection); + Objects.requireNonNull(iterator); + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= collection.add(iterator.next()); + } + return wasModified; + } + + private Iterators() { + // do not instantiate + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/Lists.java b/src/main/java/org/apache/commons/compress/utils/Lists.java new file mode 100644 index 000000000..e7a82dc6a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/Lists.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.util.ArrayList; +import java.util.Iterator; + +/** + * List utilities + * + * @since 1.13 + */ +public class Lists { + + /** + * Creates a new {@link ArrayList}. + * + * @param <E> type of elements contained in new list + * @return a new {@link ArrayList} + */ + public static <E> ArrayList<E> newArrayList() { + return new ArrayList<>(); + } + + /** + * Creates a new {@link ArrayList} filled with the contents of the given + * {@code iterator}. + * + * @param iterator + * the source iterator + * @param <E> type of elements contained in new list + * @return a new {@link ArrayList} + */ + public static <E> ArrayList<E> newArrayList(final Iterator<? extends E> iterator) { + final ArrayList<E> list = newArrayList(); + Iterators.addAll(list, iterator); + return list; + } + + private Lists() { + // do not instantiate + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannel.java b/src/main/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannel.java new file mode 100644 index 000000000..eece7f5bb --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannel.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.SeekableByteChannel; +import java.util.Arrays; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * A {@link SeekableByteChannel} implementation that wraps a byte[]. + * + * <p>When this channel is used for writing an internal buffer grows to accommodate + * incoming data. A natural size limit is the value of {@link Integer#MAX_VALUE}. + * Internal buffer can be accessed via {@link SeekableInMemoryByteChannel#array()}.</p> + * + * @since 1.13 + * @NotThreadSafe + */ +public class SeekableInMemoryByteChannel implements SeekableByteChannel { + + private static final int NAIVE_RESIZE_LIMIT = Integer.MAX_VALUE >> 1; + + private byte[] data; + private final AtomicBoolean closed = new AtomicBoolean(); + private int position, size; + + /** + * Constructor taking a byte array. + * + * <p>This constructor is intended to be used with pre-allocated buffer or when + * reading from a given byte array.</p> + * + * @param data input data or pre-allocated array. + */ + public SeekableInMemoryByteChannel(byte[] data) { + this.data = data; + size = data.length; + } + + /** + * Parameterless constructor - allocates internal buffer by itself. + */ + public SeekableInMemoryByteChannel() { + this(new byte[0]); + } + + /** + * Constructor taking a size of storage to be allocated. + * + * <p>Creates a channel and allocates internal storage of a given size.</p> + * + * @param size size of internal buffer to allocate, in bytes. + */ + public SeekableInMemoryByteChannel(int size) { + this(new byte[size]); + } + + @Override + public long position() { + return position; + } + + @Override + public SeekableByteChannel position(long newPosition) throws IOException { + ensureOpen(); + if (newPosition < 0L || newPosition > Integer.MAX_VALUE) { + throw new IllegalArgumentException("Position has to be in range 0.. " + Integer.MAX_VALUE); + } + position = (int) newPosition; + return this; + } + + @Override + public long size() { + return size; + } + + @Override + public SeekableByteChannel truncate(long newSize) { + if (size > newSize) { + size = (int) newSize; + } + repositionIfNecessary(); + return this; + } + + @Override + public int read(ByteBuffer buf) throws IOException { + ensureOpen(); + repositionIfNecessary(); + int wanted = buf.remaining(); + int possible = size - position; + if (possible <= 0) { + return -1; + } + if (wanted > possible) { + wanted = possible; + } + buf.put(data, position, wanted); + position += wanted; + return wanted; + } + + @Override + public void close() { + closed.set(true); + } + + @Override + public boolean isOpen() { + return !closed.get(); + } + + @Override + public int write(ByteBuffer b) throws IOException { + ensureOpen(); + int wanted = b.remaining(); + int possibleWithoutResize = size - position; + if (wanted > possibleWithoutResize) { + int newSize = position + wanted; + if (newSize < 0) { // overflow + resize(Integer.MAX_VALUE); + wanted = Integer.MAX_VALUE - position; + } else { + resize(newSize); + } + } + b.get(data, position, wanted); + position += wanted; + if (size < position) { + size = position; + } + return wanted; + } + + /** + * Obtains the array backing this channel. + * + * <p>NOTE: + * The returned buffer is not aligned with containing data, use + * {@link #size()} to obtain the size of data stored in the buffer.</p> + * + * @return internal byte array. + */ + public byte[] array() { + return data; + } + + private void resize(int newLength) { + int len = data.length; + if (len <= 0) { + len = 1; + } + if (newLength < NAIVE_RESIZE_LIMIT) { + while (len < newLength) { + len <<= 1; + } + } else { // avoid overflow + len = newLength; + } + data = Arrays.copyOf(data, len); + } + + private void ensureOpen() throws ClosedChannelException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + } + + private void repositionIfNecessary() { + if (position > size) { + position = size; + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/ServiceLoaderIterator.java b/src/main/java/org/apache/commons/compress/utils/ServiceLoaderIterator.java new file mode 100644 index 000000000..aeda85721 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ServiceLoaderIterator.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.ServiceConfigurationError; +import java.util.ServiceLoader; + +/** + * Iterates all services for a given class through the standard + * {@link ServiceLoader} mechanism. + * + * @param <E> + * The service to load + * @since 1.13 + */ +public class ServiceLoaderIterator<E> implements Iterator<E> { + + private E nextServiceLoader; + private final Class<E> service; + private final Iterator<E> serviceLoaderIterator; + + public ServiceLoaderIterator(final Class<E> service) { + this(service, ClassLoader.getSystemClassLoader()); + } + + public ServiceLoaderIterator(final Class<E> service, final ClassLoader classLoader) { + this.service = service; + final ServiceLoader<E> serviceLoader = ServiceLoader.load(service, classLoader); + serviceLoaderIterator = serviceLoader.iterator(); + nextServiceLoader = null; + } + + private boolean getNextServiceLoader() { + while (nextServiceLoader == null) { + try { + if (!serviceLoaderIterator.hasNext()) { + return false; + } + nextServiceLoader = serviceLoaderIterator.next(); + } catch (final ServiceConfigurationError e) { + if (e.getCause() instanceof SecurityException) { + // Ignore security exceptions + // TODO Log? + continue; + } + throw e; + } + } + return true; + } + + @Override + public boolean hasNext() { + return getNextServiceLoader(); + } + + @Override + public E next() { + if (!getNextServiceLoader()) { + throw new NoSuchElementException("No more elements for service " + service.getName()); + } + final E tempNext = nextServiceLoader; + nextServiceLoader = null; + return tempNext; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("service=" + service.getName()); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/Sets.java b/src/main/java/org/apache/commons/compress/utils/Sets.java new file mode 100644 index 000000000..0a7921b51 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/Sets.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.util.Collections; +import java.util.HashSet; + +/** + * Set utilities + * + * @since 1.13 + */ +public class Sets { + + private Sets() { + // Do not instantiate + } + + /** + * Creates a new HashSet filled with the given elements + * + * @param elements + * the elements to fill the new set + * @param <E> type of elements contained in new set + * @return A new HasSet + */ + @SafeVarargs + public static <E> HashSet<E> newHashSet(E... elements) { + final HashSet<E> set = new HashSet<>(elements.length); + Collections.addAll(set, elements); + return set; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/SkipShieldingInputStream.java b/src/main/java/org/apache/commons/compress/utils/SkipShieldingInputStream.java new file mode 100644 index 000000000..e1cbcbc66 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/SkipShieldingInputStream.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * A wrapper that overwrites {@link #skip} and delegates to {@link #read} instead. + * + * <p>Some implementations of {@link InputStream} implement {@link + * InputStream#skip} in a way that throws an exception if the stream + * is not seekable - {@link System#in System.in} is known to behave + * that way. For such a stream it is impossible to invoke skip at all + * and you have to read from the stream (and discard the data read) + * instead. Skipping is potentially much faster than reading so we do + * want to invoke {@code skip} when possible. We provide this class so + * you can wrap your own {@link InputStream} in it if you encounter + * problems with {@code skip} throwing an excpetion.</p> + * + * @since 1.17 + */ +public class SkipShieldingInputStream extends FilterInputStream { + private static final int SKIP_BUFFER_SIZE = 8192; + // we can use a shared buffer as the content is discarded anyway + private static final byte[] SKIP_BUFFER = new byte[SKIP_BUFFER_SIZE]; + public SkipShieldingInputStream(InputStream in) { + super(in); + } + + @Override + public long skip(long n) throws IOException { + return n < 0 ? 0 : read(SKIP_BUFFER, 0, (int) Math.min(n, SKIP_BUFFER_SIZE)); + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/package.html b/src/main/java/org/apache/commons/compress/utils/package.html new file mode 100644 index 000000000..0409d1267 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/package.html @@ -0,0 +1,23 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Contains utilities used internally by the compress library.</p> + </body> +</html> diff --git a/src/site/resources/download_compress.cgi b/src/site/resources/download_compress.cgi new file mode 100755 index 000000000..8ca3752ba --- /dev/null +++ b/src/site/resources/download_compress.cgi @@ -0,0 +1,20 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Just call the standard mirrors.cgi script. It will use download.html +# as the input template. +exec /www/www.apache.org/dyn/mirrors/mirrors.cgi $* diff --git a/src/site/resources/images/compress-logo-white.png b/src/site/resources/images/compress-logo-white.png Binary files differnew file mode 100644 index 000000000..5749fc49c --- /dev/null +++ b/src/site/resources/images/compress-logo-white.png diff --git a/src/site/resources/images/compress-logo-white.xcf b/src/site/resources/images/compress-logo-white.xcf Binary files differnew file mode 100644 index 000000000..388a5a1dc --- /dev/null +++ b/src/site/resources/images/compress-logo-white.xcf diff --git a/src/site/resources/profile.jacoco b/src/site/resources/profile.jacoco new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/site/resources/profile.jacoco diff --git a/src/site/resources/profile.japicmp b/src/site/resources/profile.japicmp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/site/resources/profile.japicmp diff --git a/src/site/site.xml b/src/site/site.xml new file mode 100644 index 000000000..e816250cb --- /dev/null +++ b/src/site/site.xml @@ -0,0 +1,60 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> +<project name="Commons Compress"> + + <bannerRight> + <name>Commons Compress</name> + <src>/images/compress-logo-white.png</src> + <href>/index.html</href> + </bannerRight> + + <body> + <menu name="Compress"> + <item name="Overview" href="/index.html"/> + <item name="User Guide" href="/examples.html"/> + <item name="Known Limitations" href="/limitations.html"/> + <item name="Conventions" href="/conventions.html"/> + <item name="Issue Tracking" href="/issue-tracking.html"/> + <item name="Download" href="/download_compress.cgi"/> + <item name="Security Reports" href="/security-reports.html"/> + <item name="Wiki" href="https://wiki.apache.org/commons/Compress"/> + </menu> + <menu name="API Docs"> + <item name="Latest release" href="javadocs/api-release/index.html"/> + <item name="1.18" href="javadocs/api-1.18/index.html"/> + <item name="1.17" href="javadocs/api-1.17/index.html"/> + <item name="1.16.1" href="javadocs/api-1.16.1/index.html"/> + <item name="1.16" href="javadocs/api-1.16/index.html"/> + <item name="1.15" href="javadocs/api-1.15/index.html"/> + <item name="1.14" href="javadocs/api-1.14/index.html"/> + <item name="1.13" href="javadocs/api-1.13/index.html"/> + <item name="1.12" href="javadocs/api-1.12/index.html"/> + <item name="1.11" href="javadocs/api-1.11/index.html"/> + <item name="1.10" href="javadocs/api-1.10/index.html"/> + <item name="1.9" href="javadocs/api-1.9/index.html"/> + <item name="1.8.1" href="javadocs/api-1.8.1/index.html"/> + <item name="1.8" href="javadocs/api-1.8/index.html"/> + <item name="1.7" href="javadocs/api-1.7/index.html"/> + <item name="1.6" href="javadocs/api-1.6/index.html"/> + <item name="GIT latest" href="apidocs/index.html"/> + </menu> + </body> + +</project> diff --git a/src/site/xdoc/conventions.xml b/src/site/xdoc/conventions.xml new file mode 100644 index 000000000..f1a93b7e5 --- /dev/null +++ b/src/site/xdoc/conventions.xml @@ -0,0 +1,69 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<document> + <properties> + <title>Conventions</title> + </properties> + + <body> + <section name="Code Style"> + <p> + The developers of this component decided to follow the recommended standards + but not to include Checkstyle (or similar tools) into Commons Compress. + </p> + </section> + <section name="Multithreading"> + <p> + Commons Compress does not aim to be threadsafe at the moment. But the developers + agreed to document multithreading behaviour in the javadocs. + </p> + <p> + We use some of the annotations from + <a href="http://jcip.net/annotations/doc/net/jcip/annotations/package-summary.html">JCIP</a> + as Javadoc tags. The used tags are: + + <ul> + <li>@GuardedBy (field or method)</li> + <li>@Immutable (class)</li> + <li>@NotThreadSafe (class)</li> + <li>@ThreadSafe (class)</li> + </ul> + + For example: + <source> +/** + * Utility class that represents a four byte integer with conversion + * rules for the big endian byte order of ZIP files. + * + * @Immutable + */ +public final class ZipLong implements Cloneable { + </source> + + and: + + <source> +private final char [] highChars; +//@GuardedBy("this") +private Simple8BitZipEncoding encoding; + </source> + </p> + </section> + + </body> +</document> diff --git a/src/site/xdoc/download_compress.xml b/src/site/xdoc/download_compress.xml new file mode 100644 index 000000000..bed1ce7ce --- /dev/null +++ b/src/site/xdoc/download_compress.xml @@ -0,0 +1,154 @@ +<?xml version="1.0"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> +<!-- + +======================================================================+ + |**** ****| + |**** THIS FILE IS GENERATED BY THE COMMONS BUILD PLUGIN ****| + |**** DO NOT EDIT DIRECTLY ****| + |**** ****| + +======================================================================+ + | TEMPLATE FILE: download-page-template.xml | + | commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates | + +======================================================================+ + | | + | 1) Re-generate using: mvn commons:download-page | + | | + | 2) Set the following properties in the component's pom: | + | - commons.componentid (required, alphabetic, lower case) | + | - commons.release.version (required) | + | - commons.release.name (required) | + | - commons.binary.suffix (optional) | + | (defaults to "-bin", set to "" for pre-maven2 releases) | + | - commons.release.desc (optional) | + | - commons.release.subdir (optional) | + | | + | - commons.release.2/3.version (conditional) | + | - commons.release.2/3.name (conditional) | + | - commons.release.2/3.binary.suffix (optional) | + | - commons.release.2/3.desc (optional) | + | - commons.release.2/3.subdir (optional) | + | | + | 3) Example Properties | + | (commons.release.name inherited by parent: | + | ${project.artifactId}-${commons.release.version} | + | | + | <properties> | + | <commons.componentid>math</commons.componentid> | + | <commons.release.version>1.2</commons.release.version> | + | </properties> | + | | + +======================================================================+ +--> +<document> + <properties> + <title>Download Apache Commons Compress</title> + <author email="dev@commons.apache.org">Apache Commons Documentation Team</author> + </properties> + <body> + <section name="Download Apache Commons Compress"> + <subsection name="Using a Mirror"> + <p> + We recommend you use a mirror to download our release + builds, but you <strong>must</strong> <a href="https://www.apache.org/info/verification.html">verify the integrity</a> of + the downloaded files using signatures downloaded from our main + distribution directories. Recent releases (48 hours) may not yet + be available from all the mirrors. + </p> + + <p> + You are currently using <b>[preferred]</b>. If you + encounter a problem with this mirror, please select another + mirror. If all mirrors are failing, there are <i>backup</i> + mirrors (at the end of the mirrors list) that should be + available. + <br></br> + [if-any logo]<a href="[link]"><img align="right" src="[logo]" border="0"></img></a>[end] + </p> + + <form action="[location]" method="get" id="SelectMirror"> + <p> + Other mirrors: + <select name="Preferred"> + [if-any http] + [for http]<option value="[http]">[http]</option>[end] + [end] + [if-any ftp] + [for ftp]<option value="[ftp]">[ftp]</option>[end] + [end] + [if-any backup] + [for backup]<option value="[backup]">[backup] (backup)</option>[end] + [end] + </select> + <input type="submit" value="Change"></input> + </p> + </form> + + <p> + It is essential that you + <a href="https://www.apache.org/info/verification.html">verify the integrity</a> + of downloaded files, preferably using the <code>PGP</code> signature (<code>*.asc</code> files); + failing that using the <code>SHA256</code> hash (<code>*.sha256</code> checksum files). + </p> + <p> + The <a href="https://www.apache.org/dist/commons/KEYS">KEYS</a> + file contains the public PGP keys used by Apache Commons developers + to sign releases. + </p> + </subsection> + </section> + <section name="Apache Commons Compress 1.18 "> + <subsection name="Binaries"> + <table> + <tr> + <td><a href="[preferred]/commons/compress/binaries/commons-compress-1.18-bin.tar.gz">commons-compress-1.18-bin.tar.gz</a></td> + <td><a href="https://www.apache.org/dist/commons/compress/binaries/commons-compress-1.18-bin.tar.gz.sha256">sha256</a></td> + <td><a href="https://www.apache.org/dist/commons/compress/binaries/commons-compress-1.18-bin.tar.gz.asc">pgp</a></td> + </tr> + <tr> + <td><a href="[preferred]/commons/compress/binaries/commons-compress-1.18-bin.zip">commons-compress-1.18-bin.zip</a></td> + <td><a href="https://www.apache.org/dist/commons/compress/binaries/commons-compress-1.18-bin.zip.sha256">sha256</a></td> + <td><a href="https://www.apache.org/dist/commons/compress/binaries/commons-compress-1.18-bin.zip.asc">pgp</a></td> + </tr> + </table> + </subsection> + <subsection name="Source"> + <table> + <tr> + <td><a href="[preferred]/commons/compress/source/commons-compress-1.18-src.tar.gz">commons-compress-1.18-src.tar.gz</a></td> + <td><a href="https://www.apache.org/dist/commons/compress/source/commons-compress-1.18-src.tar.gz.sha256">sha256</a></td> + <td><a href="https://www.apache.org/dist/commons/compress/source/commons-compress-1.18-src.tar.gz.asc">pgp</a></td> + </tr> + <tr> + <td><a href="[preferred]/commons/compress/source/commons-compress-1.18-src.zip">commons-compress-1.18-src.zip</a></td> + <td><a href="https://www.apache.org/dist/commons/compress/source/commons-compress-1.18-src.zip.sha256">sha256</a></td> + <td><a href="https://www.apache.org/dist/commons/compress/source/commons-compress-1.18-src.zip.asc">pgp</a></td> + </tr> + </table> + </subsection> + </section> + <section name="Archives"> + <p> + Older releases can be obtained from the archives. + </p> + <ul> + <li class="download"><a href="[preferred]/commons/compress/">browse download area</a></li> + <li><a href="https://archive.apache.org/dist/commons/compress/">archives...</a></li> + </ul> + </section> + </body> +</document> diff --git a/src/site/xdoc/examples.xml b/src/site/xdoc/examples.xml new file mode 100644 index 000000000..81af44b17 --- /dev/null +++ b/src/site/xdoc/examples.xml @@ -0,0 +1,1248 @@ +<?xml version="1.0"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> +<document> + <properties> + <title>Commons Compress User Guide</title> + <author email="dev@commons.apache.org">Commons Documentation Team</author> + </properties> + <body> + <section name="General Notes"> + + <subsection name="Archivers and Compressors"> + <p>Commons Compress calls all formats that compress a single + stream of data compressor formats while all formats that + collect multiple entries inside a single (potentially + compressed) archive are archiver formats.</p> + + <p>The compressor formats supported are gzip, bzip2, xz, lzma, + Pack200, DEFLATE, Brotli, DEFLATE64, ZStandard and Z, the archiver formats are 7z, ar, arj, + cpio, dump, tar and zip. Pack200 is a special case as it can + only compress JAR files.</p> + + <p>We currently only provide read support for arj, + dump, Brotli, DEFLATE64 and Z. arj can only read uncompressed archives, 7z can read + archives with many compression and encryption algorithms + supported by 7z but doesn't support encryption when writing + archives.</p> + </subsection> + + <subsection name="Buffering"> + <p>The stream classes all wrap around streams provided by the + calling code and they work on them directly without any + additional buffering. On the other hand most of them will + benefit from buffering so it is highly recommended that + users wrap their stream + in <code>Buffered<em>(In|Out)</em>putStream</code>s before + using the Commons Compress API.</p> + + </subsection> + + <subsection name="Factories"> + + <p>Compress provides factory methods to create input/output + streams based on the names of the compressor or archiver + format as well as factory methods that try to guess the + format of an input stream.</p> + + <p>To create a compressor writing to a given output by using + the algorithm name:</p> + <source><![CDATA[ +CompressorOutputStream gzippedOut = new CompressorStreamFactory() + .createCompressorOutputStream(CompressorStreamFactory.GZIP, myOutputStream); +]]></source> + + <p>Make the factory guess the input format for a given + archiver stream:</p> + <source><![CDATA[ +ArchiveInputStream input = new ArchiveStreamFactory() + .createArchiveInputStream(originalInput); +]]></source> + + <p>Make the factory guess the input format for a given + compressor stream:</p> + <source><![CDATA[ +CompressorInputStream input = new CompressorStreamFactory() + .createCompressorInputStream(originalInput); +]]></source> + + <p>Note that there is no way to detect the lzma or Brotli formats so only + the two-arg version of + <code>createCompressorInputStream</code> can be used. Prior + to Compress 1.9 the .Z format hasn't been auto-detected + either.</p> + + </subsection> + + <subsection name="Restricting Memory Usage"> + <p>Starting with Compress 1.14 + <code>CompressorStreamFactory</code> has an optional + constructor argument that can be used to set an upper limit of + memory that may be used while decompressing or compressing a + stream. As of 1.14 this setting only affects decompressing Z, + XZ and LZMA compressed streams.</p> + <p>For the Snappy and LZ4 formats the amount of memory used + during compression is directly proportional to the window + size.</p> + </subsection> + + <subsection name="Statistics"> + <p>Starting with Compress 1.17 most of the + <code>CompressorInputStream</code> implementations as well as + <code>ZipArchiveInputStream</code> and all streams returned by + <code>ZipFile.getInputStream</code> implement the + <code>InputStreamStatistics</code> + interface. <code>SevenZFile</code> provides statistics for the + current entry via the + <code>getStatisticsForCurrentEntry</code> method. This + interface can be used to track progress while extracting a + stream or to detect potential <a + href="https://en.wikipedia.org/wiki/Zip_bomb">zip bombs</a> + when the compression ration becomes suspiciously large.</p> + </subsection> + + </section> + <section name="Archivers"> + + <subsection name="Unsupported Features"> + <p>Many of the supported formats have developed different + dialects and extensions and some formats allow for features + (not yet) supported by Commons Compress.</p> + + <p>The <code>ArchiveInputStream</code> class provides a method + <code>canReadEntryData</code> that will return false if + Commons Compress can detect that an archive uses a feature + that is not supported by the current implementation. If it + returns false you should not try to read the entry but skip + over it.</p> + + </subsection> + + <subsection name="Entry Names"> + <p>All archive formats provide meta data about the individual + archive entries via instances of <code>ArchiveEntry</code> (or + rather subclasses of it). When reading from an archive the + information provided the <code>getName</code> method is the + raw name as stored inside of the archive. There is no + guarantee the name represents a relative file name or even a + valid file name on your target operating system at all. You + should double check the outcome when you try to create file + names from entry names.</p> + </subsection> + + <subsection name="Common Extraction Logic"> + <p>Apart from 7z all formats provide a subclass of + <code>ArchiveInputStream</code> that can be used to create an + archive. For 7z <code>SevenZFile</code> provides a similar API + that does not represent a stream as our implementation + requires random access to the input and cannot be used for + general streams. The ZIP implementation can benefit a lot from + random access as well, see the <a + href="zip.html#ZipArchiveInputStream_vs_ZipFile">zip + page</a> for details.</p> + + <p>Assuming you want to extract an archive to a target + directory you'd call <code>getNextEntry</code>, verify the + entry can be read, construct a sane file name from the entry's + name, create a <codee>File</codee> and write all contents to + it - here <code>IOUtils.copy</code> may come handy. You do so + for every entry until <code>getNextEntry</code> returns + <code>null</code>.</p> + + <p>A skeleton might look like:</p> + + <source><![CDATA[ +File targetDir = ... +try (ArchiveInputStream i = ... create the stream for your format, use buffering...) { + ArchiveEntry entry = null; + while ((entry = i.getNextEntry()) != null) { + if (!i.canReadEntryData(entry)) { + // log something? + continue; + } + String name = fileName(targetDir, entry); + File f = new File(name); + if (entry.isDirectory()) { + if (!f.isDirectory() && !f.mkdirs()) { + throw new IOException("failed to create directory " + f); + } + } else { + File parent = f.getParentFile(); + if (!parent.isDirectory() && !parent.mkdirs()) { + throw new IOException("failed to create directory " + parent); + } + try (OutputStream o = Files.newOutputStream(f.toPath())) { + IOUtils.copy(i, o); + } + } + } +} +]]></source> + + <p>where the hypothetical <code>fileName</code> method is + written by you and provides the absolute name for the file + that is going to be written on disk. Here you should perform + checks that ensure the resulting file name actually is a valid + file name on your operating system or belongs to a file inside + of <code>targetDir</code> when using the entry's name as + input.</p> + + <p>If you want to combine an archive format with a compression + format - like when reading a "tar.gz" file - you wrap the + <code>ArchiveInputStream</code> around + <code>CompressorInputStream</code> for example:</p> + + <source><![CDATA[ +try (InputStream fi = Files.newInputStream(Paths.get("my.tar.gz")); + InputStream bi = new BufferedInputStream(fi); + InputStream gzi = new GzipCompressorInputStream(bi); + ArchiveInputStream o = new TarArchiveInputStream(gzi)) { +} +]]></source> + + </subsection> + + <subsection name="Common Archival Logic"> + <p>Apart from 7z all formats that support writing provide a + subclass of <code>ArchiveOutputStream</code> that can be used + to create an archive. For 7z <code>SevenZOutputFile</code> + provides a similar API that does not represent a stream as our + implementation requires random access to the output and cannot + be used for general streams. The + <code>ZipArchiveOutputStream</code> class will benefit from + random access as well but can be used for non-seekable streams + - but not all features will be available and the archive size + might be slightly bigger, see <a + href="zip.html#ZipArchiveOutputStream">the zip page</a> for + details.</p> + + <p>Assuming you want to add a collection of files to an + archive, you can first use <code>createArchiveEntry</code> for + each file. In general this will set a few flags (usually the + last modified time, the size and the information whether this + is a file or directory) based on the <code>File</code> + instance. Alternatively you can create the + <code>ArchiveEntry</code> subclass corresponding to your + format directly. Often you may want to set additional flags + like file permissions or owner information before adding the + entry to the archive.</p> + + <p>Next you use <code>putArchiveEntry</code> in order to add + the entry and then start using <code>write</code> to add the + content of the entry - here <code>IOUtils.copy</code> may + come handy. Finally you invoke + <code>closeArchiveEntry</code> once you've written all content + and before you add the next entry.</p> + + <p>Once all entries have been added you'd invoke + <code>finish</code> and finally <code>close</code> the + stream.</p> + + <p>A skeleton might look like:</p> + + <source><![CDATA[ +Collection<File> filesToArchive = ... +try (ArchiveOutputStream o = ... create the stream for your format ...) { + for (File f : filesToArchive) { + // maybe skip directories for formats like AR that don't store directories + ArchiveEntry entry = o.createArchiveEntry(f, entryName(f)); + // potentially add more flags to entry + o.putArchiveEntry(entry); + if (f.isFile()) { + try (InputStream i = Files.newInputStream(f.toPath())) { + IOUtils.copy(i, o); + } + } + o.closeArchiveEntry(); + } + out.finish(); +} +]]></source> + + <p>where the hypothetical <code>entryName</code> method is + written by you and provides the name for the entry as it is + going to be written to the archive.</p> + + <p>If you want to combine an archive format with a compression + format - like when creating a "tar.gz" file - you wrap the + <code>ArchiveOutputStream</code> around a + <code>CompressorOutputStream</code> for example:</p> + + <source><![CDATA[ +try (OutputStream fo = Files.newOutputStream(Paths.get("my.tar.gz")); + OutputStream gzo = new GzipCompressorOutputStream(fo); + ArchiveOutputStream o = new TarArchiveOutputStream(gzo)) { +} +]]></source> + + </subsection> + + <subsection name="7z"> + + <p>Note that Commons Compress currently only supports a subset + of compression and encryption algorithms used for 7z archives. + For writing only uncompressed entries, LZMA, LZMA2, BZIP2 and + Deflate are supported - in addition to those reading supports + AES-256/SHA-256 and DEFLATE64.</p> + + <p>Multipart archives are not supported at all.</p> + + <p>7z archives can use multiple compression and encryption + methods as well as filters combined as a pipeline of methods + for its entries. Prior to Compress 1.8 you could only specify + a single method when creating archives - reading archives + using more than one method has been possible before. Starting + with Compress 1.8 it is possible to configure the full + pipeline using the <code>setContentMethods</code> method of + <code>SevenZOutputFile</code>. Methods are specified in the + order they appear inside the pipeline when creating the + archive, you can also specify certain parameters for some of + the methods - see the Javadocs of + <code>SevenZMethodConfiguration</code> for details.</p> + + <p>When reading entries from an archive the + <code>getContentMethods</code> method of + <code>SevenZArchiveEntry</code> will properly represent the + compression/encryption/filter methods but may fail to + determine the configuration options used. As of Compress 1.8 + only the dictionary size used for LZMA2 can be read.</p> + + <p>Currently solid compression - compressing multiple files + as a single block to benefit from patterns repeating accross + files - is only supported when reading archives. This also + means compression ratio will likely be worse when using + Commons Compress compared to the native 7z executable.</p> + + <p>Reading or writing requires a + <code>SeekableByteChannel</code> that will be obtained + transparently when reading from or writing to a file. The + class + <code>org.apache.commons.compress.utils.SeekableInMemoryByteChannel</code> + allows you to read from or write to an in-memory archive.</p> + + <p>Adding an entry to a 7z archive:</p> +<source><![CDATA[ +SevenZOutputFile sevenZOutput = new SevenZOutputFile(file); +SevenZArchiveEntry entry = sevenZOutput.createArchiveEntry(fileToArchive, name); +sevenZOutput.putArchiveEntry(entry); +sevenZOutput.write(contentOfEntry); +sevenZOutput.closeArchiveEntry(); +]]></source> + + <p>Uncompressing a given 7z archive (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +SevenZFile sevenZFile = new SevenZFile(new File("archive.7z")); +SevenZArchiveEntry entry = sevenZFile.getNextEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + sevenZFile.read(content, offset, content.length - offset); +} +]]></source> + + <p>Uncompressing a given in-memory 7z archive:</p> + <source><![CDATA[ +byte[] inputData; // 7z archive contents +SeekableInMemoryByteChannel inMemoryByteChannel = new SeekableInMemoryByteChannel(inputData); +SevenZFile sevenZFile = new SevenZFile(inMemoryByteChannel); +SevenZArchiveEntry entry = sevenZFile.getNextEntry(); +sevenZFile.read(); // read current entry's data +]]></source> + + <h4><a name="Encrypted 7z Archives"></a>Encrypted 7z Archives</h4> + + <p>Currently Compress supports reading but not writing of + encrypted archives. When reading an encrypted archive a + password has to be provided to one of + <code>SevenZFile</code>'s constructors. If you try to read + an encrypted archive without specifying a password a + <code>PasswordRequiredException</code> (a subclass of + <code>IOException</code>) will be thrown.</p> + + <p>When specifying the password as a <code>byte[]</code> one + common mistake is to use the wrong encoding when creating + the <code>byte[]</code> from a <code>String</code>. The + <code>SevenZFile</code> class expects the bytes to + correspond to the UTF16-LE encoding of the password. An + example of reading an encrypted archive is</p> + +<source><![CDATA[ +SevenZFile sevenZFile = new SevenZFile(new File("archive.7z"), "secret".getBytes(StandardCharsets.UTF_16LE)); +SevenZArchiveEntry entry = sevenZFile.getNextEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + sevenZFile.read(content, offset, content.length - offset); +} +]]></source> + + <p>Starting with Compress 1.17 new constructors have been + added that accept the password as <code>char[]</code> rather + than a <code>byte[]</code>. We recommend you use these in + order to avoid the problem above.</p> + +<source><![CDATA[ +SevenZFile sevenZFile = new SevenZFile(new File("archive.7z"), "secret".toCharArray()); +SevenZArchiveEntry entry = sevenZFile.getNextEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + sevenZFile.read(content, offset, content.length - offset); +} +]]></source> + + </subsection> + + <subsection name="ar"> + + <p>In addition to the information stored + in <code>ArchiveEntry</code> a <code>ArArchiveEntry</code> + stores information about the owner user and group as well as + Unix permissions.</p> + + <p>Adding an entry to an ar archive:</p> +<source><![CDATA[ +ArArchiveEntry entry = new ArArchiveEntry(name, size); +arOutput.putArchiveEntry(entry); +arOutput.write(contentOfEntry); +arOutput.closeArchiveEntry(); +]]></source> + + <p>Reading entries from an ar archive:</p> +<source><![CDATA[ +ArArchiveEntry entry = (ArArchiveEntry) arInput.getNextEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + arInput.read(content, offset, content.length - offset); +} +]]></source> + + <p>Traditionally the AR format doesn't allow file names longer + than 16 characters. There are two variants that circumvent + this limitation in different ways, the GNU/SRV4 and the BSD + variant. Commons Compress 1.0 to 1.2 can only read archives + using the GNU/SRV4 variant, support for the BSD variant has + been added in Commons Compress 1.3. Commons Compress 1.3 + also optionally supports writing archives with file names + longer than 16 characters using the BSD dialect, writing + the SVR4/GNU dialect is not supported.</p> + + <table> + <thead> + <tr> + <th>Version of Apache Commons Compress</th> + <th>Support for Traditional AR Format</th> + <th>Support for GNU/SRV4 Dialect</th> + <th>Support for BSD Dialect</th> + </tr> + </thead> + <tbody> + <tr> + <td>1.0 to 1.2</td> + <td>read/write</td> + <td>read</td> + <td>-</td> + </tr> + <tr> + <td>1.3 and later</td> + <td>read/write</td> + <td>read</td> + <td>read/write</td> + </tr> + </tbody> + </table> + + <p>It is not possible to detect the end of an AR archive in a + reliable way so <code>ArArchiveInputStream</code> will read + until it reaches the end of the stream or fails to parse the + stream's content as AR entries.</p> + + </subsection> + + <subsection name="arj"> + + <p>Note that Commons Compress doesn't support compressed, + encrypted or multi-volume ARJ archives, yet.</p> + + <p>Uncompressing a given arj archive (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +ArjArchiveEntry entry = arjInput.getNextEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + arjInput.read(content, offset, content.length - offset); +} +]]></source> + </subsection> + + <subsection name="cpio"> + + <p>In addition to the information stored + in <code>ArchiveEntry</code> a <code>CpioArchiveEntry</code> + stores various attributes including information about the + original owner and permissions.</p> + + <p>The cpio package supports the "new portable" as well as the + "old" format of CPIO archives in their binary, ASCII and + "with CRC" variants.</p> + + <p>Adding an entry to a cpio archive:</p> +<source><![CDATA[ +CpioArchiveEntry entry = new CpioArchiveEntry(name, size); +cpioOutput.putArchiveEntry(entry); +cpioOutput.write(contentOfEntry); +cpioOutput.closeArchiveEntry(); +]]></source> + + <p>Reading entries from an cpio archive:</p> +<source><![CDATA[ +CpioArchiveEntry entry = cpioInput.getNextCPIOEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + cpioInput.read(content, offset, content.length - offset); +} +]]></source> + + <p>Traditionally CPIO archives are written in blocks of 512 + bytes - the block size is a configuration parameter of the + <code>Cpio*Stream</code>'s constuctors. Starting with version + 1.5 <code>CpioArchiveInputStream</code> will consume the + padding written to fill the current block when the end of the + archive is reached. Unfortunately many CPIO implementations + use larger block sizes so there may be more zero-byte padding + left inside the original input stream after the archive has + been consumed completely.</p> + + </subsection> + + <subsection name="jar"> + <p>In general, JAR archives are ZIP files, so the JAR package + supports all options provided by the <a href="#zip">ZIP</a> package.</p> + + <p>To be interoperable JAR archives should always be created + using the UTF-8 encoding for file names (which is the + default).</p> + + <p>Archives created using <code>JarArchiveOutputStream</code> + will implicitly add a <code>JarMarker</code> extra field to + the very first archive entry of the archive which will make + Solaris recognize them as Java archives and allows them to + be used as executables.</p> + + <p>Note that <code>ArchiveStreamFactory</code> doesn't + distinguish ZIP archives from JAR archives, so if you use + the one-argument <code>createArchiveInputStream</code> + method on a JAR archive, it will still return the more + generic <code>ZipArchiveInputStream</code>.</p> + + <p>The <code>JarArchiveEntry</code> class contains fields for + certificates and attributes that are planned to be supported + in the future but are not supported as of Compress 1.0.</p> + + <p>Adding an entry to a jar archive:</p> +<source><![CDATA[ +JarArchiveEntry entry = new JarArchiveEntry(name, size); +entry.setSize(size); +jarOutput.putArchiveEntry(entry); +jarOutput.write(contentOfEntry); +jarOutput.closeArchiveEntry(); +]]></source> + + <p>Reading entries from an jar archive:</p> +<source><![CDATA[ +JarArchiveEntry entry = jarInput.getNextJarEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + jarInput.read(content, offset, content.length - offset); +} +]]></source> + </subsection> + + <subsection name="dump"> + + <p>In addition to the information stored + in <code>ArchiveEntry</code> a <code>DumpArchiveEntry</code> + stores various attributes including information about the + original owner and permissions.</p> + + <p>As of Commons Compress 1.3 only dump archives using the + new-fs format - this is the most common variant - are + supported. Right now this library supports uncompressed and + ZLIB compressed archives and can not write archives at + all.</p> + + <p>Reading entries from an dump archive:</p> +<source><![CDATA[ +DumpArchiveEntry entry = dumpInput.getNextDumpEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + dumpInput.read(content, offset, content.length - offset); +} +]]></source> + + <p>Prior to version 1.5 <code>DumpArchiveInputStream</code> + would close the original input once it had read the last + record. Starting with version 1.5 it will not close the + stream implicitly.</p> + + </subsection> + + <subsection name="tar"> + + <p>The TAR package has a <a href="tar.html">dedicated + documentation page</a>.</p> + + <p>Adding an entry to a tar archive:</p> +<source><![CDATA[ +TarArchiveEntry entry = new TarArchiveEntry(name); +entry.setSize(size); +tarOutput.putArchiveEntry(entry); +tarOutput.write(contentOfEntry); +tarOutput.closeArchiveEntry(); +]]></source> + + <p>Reading entries from an tar archive:</p> +<source><![CDATA[ +TarArchiveEntry entry = tarInput.getNextTarEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + tarInput.read(content, offset, content.length - offset); +} +]]></source> + </subsection> + + <subsection name="zip"> + <p>The ZIP package has a <a href="zip.html">dedicated + documentation page</a>.</p> + + <p>Adding an entry to a zip archive:</p> +<source><![CDATA[ +ZipArchiveEntry entry = new ZipArchiveEntry(name); +entry.setSize(size); +zipOutput.putArchiveEntry(entry); +zipOutput.write(contentOfEntry); +zipOutput.closeArchiveEntry(); +]]></source> + + <p><code>ZipArchiveOutputStream</code> can use some internal + optimizations exploiting <code>SeekableByteChannel</code> if it + knows it is writing to a seekable output rather than a non-seekable + stream. If you are writing to a file, you should use the + constructor that accepts a <code>File</code> or + <code>SeekableByteChannel</code> argument rather + than the one using an <code>OutputStream</code> or the + factory method in <code>ArchiveStreamFactory</code>.</p> + + <p>Reading entries from an zip archive:</p> +<source><![CDATA[ +ZipArchiveEntry entry = zipInput.getNextZipEntry(); +byte[] content = new byte[entry.getSize()]; +LOOP UNTIL entry.getSize() HAS BEEN READ { + zipInput.read(content, offset, content.length - offset); +} +]]></source> + + <p>Reading entries from an zip archive using the + recommended <code>ZipFile</code> class:</p> +<source><![CDATA[ +ZipArchiveEntry entry = zipFile.getEntry(name); +InputStream content = zipFile.getInputStream(entry); +try { + READ UNTIL content IS EXHAUSTED +} finally { + content.close(); +} +]]></source> + + <p>Reading entries from an in-memory zip archive using + <code>SeekableInMemoryByteChannel</code> and <code>ZipFile</code> class:</p> +<source><![CDATA[ +byte[] inputData; // zip archive contents +SeekableInMemoryByteChannel inMemoryByteChannel = new SeekableInMemoryByteChannel(inputData); +ZipFile zipFile = new ZipFile(inMemoryByteChannel); +ZipArchiveEntry archiveEntry = zipFile.getEntry("entryName"); +InputStream inputStream = zipFile.getInputStream(archiveEntry); +inputStream.read() // read data from the input stream +]]></source> + + <p>Creating a zip file with multiple threads:</p> + + A simple implementation to create a zip file might look like this: + +<source> +public class ScatterSample { + + ParallelScatterZipCreator scatterZipCreator = new ParallelScatterZipCreator(); + ScatterZipOutputStream dirs = ScatterZipOutputStream.fileBased(File.createTempFile("scatter-dirs", "tmp")); + + public ScatterSample() throws IOException { + } + + public void addEntry(ZipArchiveEntry zipArchiveEntry, InputStreamSupplier streamSupplier) throws IOException { + if (zipArchiveEntry.isDirectory() && !zipArchiveEntry.isUnixSymlink()) + dirs.addArchiveEntry(ZipArchiveEntryRequest.createZipArchiveEntryRequest(zipArchiveEntry, streamSupplier)); + else + scatterZipCreator.addArchiveEntry( zipArchiveEntry, streamSupplier); + } + + public void writeTo(ZipArchiveOutputStream zipArchiveOutputStream) + throws IOException, ExecutionException, InterruptedException { + dirs.writeTo(zipArchiveOutputStream); + dirs.close(); + scatterZipCreator.writeTo(zipArchiveOutputStream); + } +} +</source> + </subsection> + + </section> + <section name="Compressors"> + + <subsection name="Concatenated Streams"> + <p>For the bzip2, gzip and xz formats as well as the framed + lz4 format a single compressed file + may actually consist of several streams that will be + concatenated by the command line utilities when decompressing + them. Starting with Commons Compress 1.4 the + <code>*CompressorInputStream</code>s for these formats support + concatenating streams as well, but they won't do so by + default. You must use the two-arg constructor and explicitly + enable the support.</p> + </subsection> + + <subsection name="Brotli"> + + <p>The implementation of this package is provided by the + <a href="https://github.com/google/brotli">Google Brotli dec</a> library.</p> + + <p>Uncompressing a given Brotli compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.br")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +BrotliCompressorInputStream brIn = new BrotliCompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = brIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +brIn.close(); +]]></source> + </subsection> + + <subsection name="bzip2"> + + <p>Note that <code>BZipCompressorOutputStream</code> keeps + hold of some big data structures in memory. While it is + recommended for <em>any</em> stream that you close it as soon as + you no longer need it, this is even more important + for <code>BZipCompressorOutputStream</code>.</p> + + <p>Uncompressing a given bzip2 compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.bz2")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = bzIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +bzIn.close(); +]]></source> + + <p>Compressing a given file using bzip2 (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.tar")); +OutputStream fout = Files.newOutputStream(Paths.get("archive.tar.gz")); +BufferedOutputStream out = new BufferedOutputStream(fout); +BZip2CompressorOutputStream bzOut = new BZip2CompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + bzOut.write(buffer, 0, n); +} +bzOut.close(); +in.close(); +]]></source> + + </subsection> + + <subsection name="DEFLATE"> + + <p>The implementation of the DEFLATE/INFLATE code used by this + package is provided by the <code>java.util.zip</code> package + of the Java class library.</p> + + <p>Uncompressing a given DEFLATE compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("some-file")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +DeflateCompressorInputStream defIn = new DeflateCompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = defIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +defIn.close(); +]]></source> + + <p>Compressing a given file using DEFLATE (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.tar")); +OutputStream fout = Files.newOutputStream(Paths.get("some-file")); +BufferedOutputStream out = new BufferedOutputStream(fout); +DeflateCompressorOutputStream defOut = new DeflateCompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + defOut.write(buffer, 0, n); +} +defOut.close(); +in.close(); +]]></source> + + </subsection> + + <subsection name="DEFLATE64"> + + <p>Uncompressing a given DEFLATE64 compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("some-file")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +Deflate64CompressorInputStream defIn = new Deflate64CompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = defIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +defIn.close(); +]]></source> + + </subsection> + + <subsection name="gzip"> + + <p>The implementation of the DEFLATE/INFLATE code used by this + package is provided by the <code>java.util.zip</code> package + of the Java class library.</p> + + <p>Uncompressing a given gzip compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.gz")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +GzipCompressorInputStream gzIn = new GzipCompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = gzIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +gzIn.close(); +]]></source> + + <p>Compressing a given file using gzip (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.tar")); +OutputStream fout = Files.newOutputStream(Paths.get("archive.tar.gz")); +BufferedOutputStream out = new BufferedOutputStream(fout); +GzipCompressorOutputStream gzOut = new GzipCompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + gzOut.write(buffer, 0, n); +} +gzOut.close(); +in.close(); +]]></source> + + </subsection> + + <subsection name="LZ4"> + + <p>There are two different "formats" used for <a + href="http://lz4.github.io/lz4/">lz4</a>. The format called + "block format" only contains the raw compressed data while the + other provides a higher level "frame format" - Commons + Compress offers two different stream classes for reading or + writing either format.</p> + + <p>Uncompressing a given frame LZ4 file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.lz4")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +FramedLZ4CompressorInputStream zIn = new FramedLZ4CompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = zIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +zIn.close(); +]]></source> + + <p>Compressing a given file using the LZ4 frame format (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.tar")); +OutputStream fout = Files.newOutputStream(Paths.get("archive.tar.lz4")); +BufferedOutputStream out = new BufferedOutputStream(fout); +FramedLZ4CompressorOutputStream lzOut = new FramedLZ4CompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + lzOut.write(buffer, 0, n); +} +lzOut.close(); +in.close(); +]]></source> + + </subsection> + + <subsection name="lzma"> + + <p>The implementation of this package is provided by the + public domain <a href="https://tukaani.org/xz/java.html">XZ + for Java</a> library.</p> + + <p>Uncompressing a given lzma compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.lzma")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +LZMACompressorInputStream lzmaIn = new LZMACompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = xzIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +lzmaIn.close(); +]]></source> + + <p>Compressing a given file using lzma (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.tar")); +OutputStream fout = Files.newOutputStream(Paths.get("archive.tar.lzma")); +BufferedOutputStream out = new BufferedOutputStream(fout); +LZMACompressorOutputStream lzOut = new LZMACompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + lzOut.write(buffer, 0, n); +} +lzOut.close(); +in.close(); +]]></source> + + </subsection> + + <subsection name="Pack200"> + + <p>The Pack200 package has a <a href="pack200.html">dedicated + documentation page</a>.</p> + + <p>The implementation of this package is provided by + the <code>java.util.zip</code> package of the Java class + library.</p> + + <p>Uncompressing a given pack200 compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.pack")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.jar")); +Pack200CompressorInputStream pIn = new Pack200CompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = pIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +pIn.close(); +]]></source> + + <p>Compressing a given jar using pack200 (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.jar")); +OutputStream fout = Files.newOutputStream(Paths.get("archive.pack")); +BufferedOutputStream out = new BufferedInputStream(fout); +Pack200CompressorOutputStream pOut = new Pack200CompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + pOut.write(buffer, 0, n); +} +pOut.close(); +in.close(); +]]></source> + + </subsection> + + <subsection name="Snappy"> + + <p>There are two different "formats" used for <a + href="https://github.com/google/snappy/">Snappy</a>, one only + contains the raw compressed data while the other provides a + higher level "framing format" - Commons Compress offers two + different stream classes for reading either format.</p> + + <p>Starting with 1.12 we've added support for different + dialects of the framing format that can be specified when + constructing the stream. The <code>STANDARD</code> dialect + follows the "framing format" specification while the + <code>IWORK_ARCHIVE</code> dialect can be used to parse IWA + files that are part of Apple's iWork 13 format. If no dialect + has been specified, <code>STANDARD</code> is used. Only the + <code>STANDARD</code> format can be detected by + <code>CompressorStreamFactory</code>.</p> + + <p>Uncompressing a given framed Snappy file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.sz")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +FramedSnappyCompressorInputStream zIn = new FramedSnappyCompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = zIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +zIn.close(); +]]></source> + + <p>Compressing a given file using framed Snappy (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.tar")); +OutputStream fout = Files.newOutputStream(Paths.get("archive.tar.sz")); +BufferedOutputStream out = new BufferedOutputStream(fout); +FramedSnappyCompressorOutputStream snOut = new FramedSnappyCompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + snOut.write(buffer, 0, n); +} +snOut.close(); +in.close(); +]]></source> + + </subsection> + + <subsection name="XZ"> + + <p>The implementation of this package is provided by the + public domain <a href="https://tukaani.org/xz/java.html">XZ + for Java</a> library.</p> + + <p>When you try to open an XZ stream for reading using + <code>CompressorStreamFactory</code>, Commons Compress will + check whether the XZ for Java library is available. Starting + with Compress 1.9 the result of this check will be cached + unless Compress finds OSGi classes in its classpath. You can + use <code>XZUtils#setCacheXZAvailability</code> to overrride + this default behavior.</p> + + <p>Uncompressing a given XZ compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.xz")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +XZCompressorInputStream xzIn = new XZCompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = xzIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +xzIn.close(); +]]></source> + + <p>Compressing a given file using XZ (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.tar")); +OutputStream fout = Files.newOutputStream(Paths.get("archive.tar.xz")); +BufferedOutputStream out = new BufferedInputStream(fout); +XZCompressorOutputStream xzOut = new XZCompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + xzOut.write(buffer, 0, n); +} +xzOut.close(); +in.close(); +]]></source> + + </subsection> + + <subsection name="Z"> + + <p>Uncompressing a given Z compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.Z")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +ZCompressorInputStream zIn = new ZCompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = zIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +zIn.close(); +]]></source> + + </subsection> + + <subsection name="Zstandard"> + + <p>The implementation of this package is provided by the + <a href="https://github.com/luben/zstd-jni">Zstandard JNI</a> library.</p> + + <p>Uncompressing a given Zstandard compressed file (you would + certainly add exception handling and make sure all streams + get closed properly):</p> +<source><![CDATA[ +InputStream fin = Files.newInputStream(Paths.get("archive.tar.zstd")); +BufferedInputStream in = new BufferedInputStream(fin); +OutputStream out = Files.newOutputStream(Paths.get("archive.tar")); +ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(in); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = zsIn.read(buffer))) { + out.write(buffer, 0, n); +} +out.close(); +zsIn.close(); +]]></source> + + <p>Compressing a given file using the Zstandard format (you + would certainly add exception handling and make sure all + streams get closed properly):</p> +<source><![CDATA[ +InputStream in = Files.newInputStream(Paths.get("archive.tar")); +OutputStream fout = Files.newOutputStream(Paths.get("archive.tar.zstd")); +BufferedOutputStream out = new BufferedOutputStream(fout); +ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(out); +final byte[] buffer = new byte[buffersize]; +int n = 0; +while (-1 != (n = in.read(buffer))) { + zOut.write(buffer, 0, n); +} +zOut.close(); +in.close(); +]]></source> + + </subsection> + </section> + + <section name="Extending Commons Compress"> + + <p> + Starting in release 1.13, it is now possible to add Compressor- and ArchiverStream implementations using the + Java's <a href="https://docs.oracle.com/javase/7/docs/api/java/util/ServiceLoader.html">ServiceLoader</a> + mechanism. + </p> + + <subsection name="Extending Commons Compress Compressors"> + + <p> + To provide your own compressor, you must make available on the classpath a file called + <code>META-INF/services/org.apache.commons.compress.compressors.CompressorStreamProvider</code>. + </p> + <p> + This file MUST contain one fully-qualified class name per line. + </p> + <p> + For example: + </p> + <pre>org.apache.commons.compress.compressors.TestCompressorStreamProvider</pre> + <p> + This class MUST implement the Commons Compress interface + <a href="apidocs/org/apache/commons/compress/compressors/CompressorStreamProvider.html">org.apache.commons.compress.compressors.CompressorStreamProvider</a>. + </p> + </subsection> + + <subsection name="Extending Commons Compress Archivers"> + + <p> + To provide your own compressor, you must make available on the classpath a file called + <code>META-INF/services/org.apache.commons.compress.archivers.ArchiveStreamProvider</code>. + </p> + <p> + This file MUST contain one fully-qualified class name per line. + </p> + <p> + For example: + </p> + <pre>org.apache.commons.compress.archivers.TestArchiveStreamProvider</pre> + <p> + This class MUST implement the Commons Compress interface + <a href="apidocs/org/apache/commons/compress/archivers/ArchiveStreamProvider.html">org.apache.commons.compress.archivers.ArchiveStreamProvider</a>. + </p> + </subsection> + + </section> + </body> +</document> diff --git a/src/site/xdoc/index.xml b/src/site/xdoc/index.xml new file mode 100644 index 000000000..d94b054bf --- /dev/null +++ b/src/site/xdoc/index.xml @@ -0,0 +1,130 @@ +<?xml version="1.0"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> +<document> + <properties> + <title>Overview</title> + <author email="dev@commons.apache.org">Commons Documentation Team</author> + </properties> + <body> + <section name="Apache Commons Compress™"> + <p> + The Apache Commons Compress library defines an API for + working with ar, cpio, Unix dump, tar, zip, gzip, XZ, + Pack200, bzip2, 7z, arj, lzma, snappy, DEFLATE, lz4, + Brotli, Zstandard, DEFLATE64 and Z files. + </p> + <p> + The code in this component has many origins: + </p> + <ul> + <li>The bzip2, tar and zip support came from Avalon's + Excalibur, but originally from Ant, as far as life in + Apache goes. The tar package is originally Tim Endres' + public domain package. The bzip2 package is based on + the work done by Keiron Liddle as well as Julian Seward's + <a href="http://www.bzip.org/index.html">libbzip2</a>. + It has migrated + via:<br/> Ant -> Avalon-Excalibur -> Commons-IO + -> Commons-Compress.</li> + <li>The cpio package has been contributed by Michael Kuss + and + the <a href="http://jrpm.sourceforge.net/">jRPM</a> + project.</li> + </ul> + + </section> + + <section name="Status"> + <p>The current release is 1.18 and requires Java 7.</p> + + <p>Below we highlight some new features, for a full list + of changes see the <a href="changes-report.html">Changes + Report</a>.</p> + + <subsection name="What's new in 1.18?"> + <ul> + <li>The CPIO package now properly handles file names + using a mult-byte encoding.</li> + <li>ZipArchiveInputStream can now deal with APK files + containing an APK signing block.</li> + <li>It is now possible to specifiy various parameters + for Zstd output.</li> + </ul> + </subsection> + </section> + + <section name="Documentation"> + <p>The compress component is split into <em>compressors</em> and + <em>archivers</em>. While <em>compressors</em> + (un)compress streams that usually store a single + entry, <em>archivers</em> deal with archives that contain + structured content represented + by <code>ArchiveEntry</code> instances which in turn + usually correspond to single files or directories.</p> + + <p>Currently the bzip2, Pack200, XZ, gzip, lzma, brotli, + Zstandard and Z formats are + supported as compressors where gzip support is mostly provided by + the <code>java.util.zip</code> package and Pack200 support + by the <code>java.util.jar</code> package of the Java + class library. XZ and lzma support is provided by the public + domain <a href="https://tukaani.org/xz/java.html">XZ for + Java</a> library. Brotli support is provided by the MIT + licensed <a href="https://github.com/google/brotli">Google + Brotli decoder</a>. Zstandard support is provided by the BSD + licensed <a href="https://github.com/luben/zstd-jni">Zstd-jni</a>. + As of Commons Compress 1.18 support for the DEFLATE64, Z and Brotli + formats is read-only.</p> + + <p>The ar, arj, cpio, dump, tar, 7z and zip formats are supported as + archivers where the <a href="zip.html">zip</a> + implementation provides capabilities that go beyond the + features found in java.util.zip. As of Commons Compress + 1.18 support for the dump and arj formats is + read-only - 7z can read most compressed and encrypted + archives but only write unencrypted ones. LZMA(2) support + in 7z requires <a href="https://tukaani.org/xz/java.html">XZ for + Java</a> as well.</p> + + <p>The compress component provides abstract base classes for + compressors and archivers together with factories that can + be used to choose implementations by algorithm name. In + the case of input streams the factories can also be used + to guess the format and provide the matching + implementation.</p> + + <ul> + <li>The <a href="examples.html">user guide</a> contains + more detailed information and some examples.</li> + <li>The <a href="limitations.html">known limitations and + problems</a> page lists the currently known problems + grouped by the format they apply to.</li> + <li>The <a href="apidocs/index.html">Javadoc</a> of the latest GIT</li> + <li>The <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=tree">GIT + repository</a> can be browsed.</li> + </ul> + </section> + <section name="Releases"> + <p> + <a href="https://commons.apache.org/compress/download_compress.cgi">Download now!</a> + </p> + </section> + </body> +</document> diff --git a/src/site/xdoc/issue-tracking.xml b/src/site/xdoc/issue-tracking.xml new file mode 100644 index 000000000..6389f92ec --- /dev/null +++ b/src/site/xdoc/issue-tracking.xml @@ -0,0 +1,102 @@ +<?xml version="1.0"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> +<!-- + +======================================================================+ + |**** ****| + |**** THIS FILE IS GENERATED BY THE COMMONS BUILD PLUGIN ****| + |**** DO NOT EDIT DIRECTLY ****| + |**** ****| + +======================================================================+ + | TEMPLATE FILE: issue-tracking-template.xml | + | commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates | + +======================================================================+ + | | + | 1) Re-generate using: mvn commons:jira-page | + | | + | 2) Set the following properties in the component's pom: | + | - commons.jira.id (required, alphabetic, upper case) | + | - commons.jira.pid (required, numeric) | + | | + | 3) Example Properties | + | | + | <properties> | + | <commons.jira.id>MATH</commons.jira.id> | + | <commons.jira.pid>12310485</commons.jira.pid> | + | </properties> | + | | + +======================================================================+ +--> +<document> + <properties> + <title>Apache Commons Compress Issue tracking</title> + <author email="dev@commons.apache.org">Apache Commons Documentation Team</author> + </properties> + <body> + + <section name="Apache Commons Compress Issue tracking"> + <p> + Apache Commons Compress uses <a href="https://issues.apache.org/jira/">ASF JIRA</a> for tracking issues. + See the <a href="https://issues.apache.org/jira/browse/COMPRESS">Apache Commons Compress JIRA project page</a>. + </p> + + <p> + To use JIRA you may need to <a href="https://issues.apache.org/jira/secure/Signup!default.jspa">create an account</a> + (if you have previously created/updated Commons issues using Bugzilla an account will have been automatically + created and you can use the <a href="https://issues.apache.org/jira/secure/ForgotPassword!default.jspa">Forgot Password</a> + page to get a new password). + </p> + + <p> + If you would like to report a bug, or raise an enhancement request with + Apache Commons Compress please do the following: + <ol> + <li><a href="https://issues.apache.org/jira/secure/IssueNavigator.jspa?reset=true&pid=12310904&sorter/field=issuekey&sorter/order=DESC&status=1&status=3&status=4">Search existing open bugs</a>. + If you find your issue listed then please add a comment with your details.</li> + <li><a href="mail-lists.html">Search the mailing list archive(s)</a>. + You may find your issue or idea has already been discussed.</li> + <li>Decide if your issue is a bug or an enhancement.</li> + <li>Submit either a <a href="https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12310904&issuetype=1&priority=4&assignee=-1">bug report</a> + or <a href="https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12310904&issuetype=4&priority=4&assignee=-1">enhancement request</a>.</li> + </ol> + </p> + + <p> + Please also remember these points: + <ul> + <li>the more information you provide, the better we can help you</li> + <li>test cases are vital, particularly for any proposed enhancements</li> + <li>the developers of Apache Commons Compress are all unpaid volunteers</li> + </ul> + </p> + + <p> + For more information on subversion and creating patches see the + <a href="https://www.apache.org/dev/contributors.html">Apache Contributors Guide</a>. + </p> + + <p> + You may also find these links useful: + <ul> + <li><a href="https://issues.apache.org/jira/secure/IssueNavigator.jspa?reset=true&pid=12310904&sorter/field=issuekey&sorter/order=DESC&status=1&status=3&status=4">All Open Apache Commons Compress bugs</a></li> + <li><a href="https://issues.apache.org/jira/secure/IssueNavigator.jspa?reset=true&pid=12310904&sorter/field=issuekey&sorter/order=DESC&status=5&status=6">All Resolved Apache Commons Compress bugs</a></li> + <li><a href="https://issues.apache.org/jira/secure/IssueNavigator.jspa?reset=true&pid=12310904&sorter/field=issuekey&sorter/order=DESC">All Apache Commons Compress bugs</a></li> + </ul> + </p> + </section> + </body> +</document> diff --git a/src/site/xdoc/limitations.xml b/src/site/xdoc/limitations.xml new file mode 100644 index 000000000..3e9a82617 --- /dev/null +++ b/src/site/xdoc/limitations.xml @@ -0,0 +1,235 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<document> + <properties> + <title>Known Limitations and Problems</title> + </properties> + + <body> + <p>This page lists the known limitations and problems of Apache + Commons Compress™ grouped by the archiving/compression + format they apply to.</p> + + <section name="General"> + <ul> + <li>Several implementations of decompressors and unarchivers will + invoke <a + href="https://docs.oracle.com/javase/10/docs/api/java/io/InputStream.html#skip(long)"><code>skip</code></a> + on the underlying <code>InputStream</code> which may throw an + <code>IOException</code> in some stream implementations. One + known case where this happens is when using + <code>System.in</code> as input. If you encounter an + exception with a message like "Illegal seek" we recommend you + wrap your stream in a <code>SkipShieldingInputStream</code> + from our utils package before passing it to Compress.</li> + </ul> + </section> + + <section name="7Z"> + <ul> + <li>the format requires the otherwise optional <a + href="https://tukaani.org/xz/java.html">XZ for Java</a> + library.</li> + <li>only <code>File</code>s are supported as input/output, + not streams. Starting with Compress 1.13 + <code>SeekableByteChannel</code> is supported as well.</li> + <li>In Compress 1.7 + <code>ArchiveStreamFactory</code> will not auto-detect 7z + archives, starting with 1.8 it will throw a + <code>StreamingNotSupportedException</code> when reading from + a 7z archive.</li> + <li>Encryption, solid compression and header compression and + are only supported when reading archives</li> + <li>Commons Compress 1.12 and earlier didn't support writing + LZMA.</li> + <li>Several of the "methods" supported by 7z are not + implemented in Compress.</li> + <li>No support for multi-volume archives</li> + <li>Support for some BCJ filters and the DELTA filter has + been added with Compress 1.8. Because of a known bug in + version 1.4 of the <a + href="https://tukaani.org/xz/java.html">XZ for Java</a> + library, archives using BCJ filters will cause an + <code>AssertionError</code> when read. If you need support + for BCJ filters you must use XZ for Java 1.5 or later.</li> + </ul> + </section> + <section name="AR"> + <ul> + <li>AR archives can not contain directories - this is a + limitation of the format rather than one of Compress' + implementation.</li> + <li>file names longer than 16 characters are only fully + supported using the BSD dialect, the GNU/SRV4 dialect is only + supported when reading archives.</li> + </ul> + </section> + <section name="ARJ"> + <ul> + <li>read-only support</li> + <li>no support for compression, encryption or multi-volume + archives</li> + </ul> + </section> + <section name="Brotli"> + <ul> + <li>the format requires the otherwise optional <a + href="https://github.com/google/brotli">Google Brotli dec</a> + library.</li> + <li>read-only support</li> + <li><code>CompressorStreamFactory</code> is not able to auto-detect + streams using Brotli compression.</li> + </ul> + </section> + <section name="BZIP2"> + <p>Versions of Compress prior to 1.4.1 are vulnerable to a + possible denial of service attack, see the <a + href="security.html">Security Reports</a> page for details.</p> + </section> + <section name="CPIO"> + <p>We are not aware of any problems.</p> + </section> + <section name="DEFLATE"> + <ul> + <li><code>CompressorStreamFactory</code> is not able to auto-detect + streams using DEFLATE compression.</li> + </ul> + </section> + <section name="DEFLATE64"> + <ul> + <li><code>CompressorStreamFactory</code> is not able to auto-detect + streams using DEFLATE64 compression.</li> + <li>read-only support</li> + </ul> + </section> + <section name="DUMP"> + <ul> + <li>read-only support</li> + <li>only the new-fs format is supported</li> + <li>the only compression algorithm supported is zlib</li> + </ul> + </section> + <section name="GZIP"> + <p>We are not aware of any problems.</p> + </section> + <section name="JAR"> + <p>JAR archives are special ZIP archives, all limitations of <a + href="#ZIP">ZIP</a> apply to JAR as well.</p> + <ul> + <li><code>ArchiveStreamFactory</code> cannot tell JAR + archives from ZIP archives and will not auto-detect + JARs.</li> + <li>Compress doesn't provide special access to the archive's + MANIFEST</li> + </ul> + </section> + <section name="LZ4"> + <ul> + <li>In theory LZ4 compressed streams can contain literals and + copies of arbitrary length while Commons Compress only + supports sizes up to 2<sup>63</sup> - 1 (i.e. ≈ 9.2 + EB).</li> + </ul> + </section> + <section name="LZMA"> + <ul> + <li>the format requires the otherwise optional <a + href="https://tukaani.org/xz/java.html">XZ for Java</a> + library.</li> + <li>Commons Compress 1.12 and earlier only support reading + the format</li> + </ul> + </section> + <section name="PACK200"> + <p>We are not aware of any problems.</p> + </section> + <section name="SNAPPY"> + <ul> + <li>Commons Compress 1.13 and earlier only support reading + the format</li> + </ul> + </section> + <section name="TAR"> + <ul> + <li>sparse files can neither be read nor written</li> + <li>only a subset of the GNU and POSIX extensions are + supported</li> + <li>In Compress 1.6 <code>TarArchiveInputStream</code> could + fail to read the full contents of an entry unless the stream + was wrapped in a buffering stream.</li> + </ul> + </section> + <section name="XZ"> + <ul> + <li>the format requires the otherwise optional <a + href="https://tukaani.org/xz/java.html">XZ for Java</a> + library.</li> + </ul> + </section> + <section name="Z"> + <ul> + <li>Prior to Compress 1.8.1 + <code>CompressorStreamFactory</code> was not able to + auto-detect streams using .Z compression.</li> + <li>read-only support</li> + </ul> + </section> + <section name="ZIP"> + <ul> + <li><code>ZipArchiveInputStream</code> is limited and may + even return false contents in some cases, use + <code>ZipFile</code> whenever possible. See <a + href="zip.html#ZipArchiveInputStream_vs_ZipFile">the ZIP + documentation page</a> for details. This limitation is a + result of streaming data vs using random access and not a + limitation of Compress' specific implementation.</li> + <li>only a subset of compression methods are supported, + including the most common STORED and DEFLATEd. IMPLODE, + SHRINK, DEFLATE64 and BZIP2 support is read-only.</li> + <li>no support for encryption or multi-volume archives</li> + <li>In versions prior to Compress 1.6 + <code>ZipArchiveEntries</code> read from an archive will + contain non-zero millisecond values when using Java8 rather + than the expected two-second granularity.</li> + <li>Compress 1.7 has a known bug where the very first entry + of an archive will not be read correctly by + <code>ZipArchiveInputStream</code> if it used the STORED + method.</li> + <li><code>ZipArchiveEntry#getLastModifiedDate</code> uses + <code>ZipEntry#getTime</code> under the covers which may + return different times for the same archive when using + different versions of Java.</li> + <li>In versions of Compress prior to 1.16 a specially crafted + ZIP archive can be used to cause an infinite loop inside of + Compress' extra field parser used by the <code>ZipFile</code> + and <code>ZipArchiveInputStream</code> classes. This can be + used to mount a denial of service attack against services + that use Compress' zip package. See the <a + href="security.html">Security Reports</a> page for + details.</li> + </ul> + </section> + <section name="Zstandard"> + <ul> + <li>the format requires the otherwise optional <a + href="https://github.com/luben/zstd-jni">Zstandard JNI</a> + library.</li> + </ul> + </section> + </body> +</document> diff --git a/src/site/xdoc/mail-lists.xml b/src/site/xdoc/mail-lists.xml new file mode 100644 index 000000000..6b0b1a51c --- /dev/null +++ b/src/site/xdoc/mail-lists.xml @@ -0,0 +1,205 @@ +<?xml version="1.0"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> +<!-- + +======================================================================+ + |**** ****| + |**** THIS FILE IS GENERATED BY THE COMMONS BUILD PLUGIN ****| + |**** DO NOT EDIT DIRECTLY ****| + |**** ****| + +======================================================================+ + | TEMPLATE FILE: mail-lists-template.xml | + | commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates | + +======================================================================+ + | | + | 1) Re-generate using: mvn commons:mail-page | + | | + | 2) Set the following properties in the component's pom: | + | - commons.componentid (required, alphabetic, lower case) | + | | + | 3) Example Properties | + | | + | <properties> | + | <commons.componentid>math</commons.componentid> | + | </properties> | + | | + +======================================================================+ +--> +<document> + <properties> + <title>Apache Commons Compress Mailing Lists</title> + <author email="dev@commons.apache.org">Apache Commons Documentation Team</author> + </properties> + <body> + + <section name="Overview"> + <p> + <a href="index.html">Apache Commons Compress</a> shares mailing lists with all the other + <a href="https://commons.apache.org/components.html">Commons Components</a>. + To make it easier for people to only read messages related to components they are interested in, + the convention in Commons is to prefix the subject line of messages with the component's name, + for example: + <ul> + <li>[compress] Problem with the ...</li> + </ul> + </p> + <p> + Questions related to the usage of Apache Commons Compress should be posted to the + <a href="https://mail-archives.apache.org/mod_mbox/commons-user/">User List</a>. + <br /> + The <a href="https://mail-archives.apache.org/mod_mbox/commons-dev/">Developer List</a> + is for questions and discussion related to the development of Apache Commons Compress. + <br /> + Please do not cross-post; developers are also subscribed to the user list. + <br /> + You must be subscribed to post to the mailing lists. Follow the Subscribe links below + to subscribe. + </p> + <p> + <strong>Note:</strong> please don't send patches or attachments to any of the mailing lists. + Patches are best handled via the <a href="issue-tracking.html">Issue Tracking</a> system. + Otherwise, please upload the file to a public server and include the URL in the mail. + </p> + </section> + + <section name="Apache Commons Compress Mailing Lists"> + <p> + <strong>Please prefix the subject line of any messages for <a href="index.html">Apache Commons Compress</a> + with <i>[compress]</i></strong> - <i>thanks!</i> + <br /> + <br /> + </p> + + <table> + <tr> + <th>Name</th> + <th>Subscribe</th> + <th>Unsubscribe</th> + <th>Post</th> + <th>Archive</th> + <th>Other Archives</th> + </tr> + + + <tr> + <td> + <strong>Commons User List</strong> + <br /><br /> + Questions on using Apache Commons Compress. + <br /><br /> + </td> + <td><a href="mailto:user-subscribe@commons.apache.org">Subscribe</a></td> + <td><a href="mailto:user-unsubscribe@commons.apache.org">Unsubscribe</a></td> + <td><a href="mailto:user@commons.apache.org?subject=[compress]">Post</a></td> + <td><a href="https://mail-archives.apache.org/mod_mbox/commons-user/">mail-archives.apache.org</a></td> + <td><a href="https://markmail.org/list/org.apache.commons.users/">markmail.org</a><br /> + <a href="https://www.mail-archive.com/user@commons.apache.org/">www.mail-archive.com</a><br /> + <a href="http://news.gmane.org/gmane.comp.jakarta.commons.devel">news.gmane.org</a> + </td> + </tr> + + + <tr> + <td> + <strong>Commons Developer List</strong> + <br /><br /> + Discussion of development of Apache Commons Compress. + <br /><br /> + </td> + <td><a href="mailto:dev-subscribe@commons.apache.org">Subscribe</a></td> + <td><a href="mailto:dev-unsubscribe@commons.apache.org">Unsubscribe</a></td> + <td><a href="mailto:dev@commons.apache.org?subject=[compress]">Post</a></td> + <td><a href="https://mail-archives.apache.org/mod_mbox/commons-dev/">mail-archives.apache.org</a></td> + <td><a href="https://markmail.org/list/org.apache.commons.dev/">markmail.org</a><br /> + <a href="https://www.mail-archive.com/dev@commons.apache.org/">www.mail-archive.com</a><br /> + <a href="http://news.gmane.org/gmane.comp.jakarta.commons.devel">news.gmane.org</a> + </td> + </tr> + + + <tr> + <td> + <strong>Commons Issues List</strong> + <br /><br /> + Only for e-mails automatically generated by the <a href="issue-tracking.html">issue tracking</a> system. + <br /><br /> + </td> + <td><a href="mailto:issues-subscribe@commons.apache.org">Subscribe</a></td> + <td><a href="mailto:issues-unsubscribe@commons.apache.org">Unsubscribe</a></td> + <td><i>read only</i></td> + <td><a href="https://mail-archives.apache.org/mod_mbox/commons-issues/">mail-archives.apache.org</a></td> + <td><a href="https://markmail.org/list/org.apache.commons.issues/">markmail.org</a><br /> + <a href="https://www.mail-archive.com/issues@commons.apache.org/">www.mail-archive.com</a> + </td> + </tr> + + + <tr> + <td> + <strong>Commons Commits List</strong> + <br /><br /> + Only for e-mails automatically generated by the <a href="source-repository.html">source control</a> sytem. + <br /><br /> + </td> + <td><a href="mailto:commits-subscribe@commons.apache.org">Subscribe</a></td> + <td><a href="mailto:commits-unsubscribe@commons.apache.org">Unsubscribe</a></td> + <td><i>read only</i></td> + <td><a href="https://mail-archives.apache.org/mod_mbox/commons-commits/">mail-archives.apache.org</a></td> + <td><a href="https://markmail.org/list/org.apache.commons.commits/">markmail.org</a><br /> + <a href="https://www.mail-archive.com/commits@commons.apache.org/">www.mail-archive.com</a> + </td> + </tr> + + </table> + + </section> + <section name="Apache Mailing Lists"> + <p> + Other mailing lists which you may find useful include: + </p> + + <table> + <tr> + <th>Name</th> + <th>Subscribe</th> + <th>Unsubscribe</th> + <th>Post</th> + <th>Archive</th> + <th>Other Archives</th> + </tr> + <tr> + <td> + <strong>Apache Announce List</strong> + <br /><br /> + General announcements of Apache project releases. + <br /><br /> + </td> + <td><a class="externalLink" href="mailto:announce-subscribe@apache.org">Subscribe</a></td> + <td><a class="externalLink" href="mailto:announce-unsubscribe@apache.org">Unsubscribe</a></td> + <td><i>read only</i></td> + <td><a class="externalLink" href="https://mail-archives.apache.org/mod_mbox/www-announce/">mail-archives.apache.org</a></td> + <td><a class="externalLink" href="https://markmail.org/list/org.apache.announce/">markmail.org</a><br /> + <a class="externalLink" href="http://old.nabble.com/Apache-News-and-Announce-f109.html">old.nabble.com</a><br /> + <a class="externalLink" href="https://www.mail-archive.com/announce@apache.org/">www.mail-archive.com</a><br /> + <a class="externalLink" href="http://news.gmane.org/gmane.comp.apache.announce">news.gmane.org</a> + </td> + </tr> + </table> + + </section> + </body> +</document> diff --git a/src/site/xdoc/pack200.xml b/src/site/xdoc/pack200.xml new file mode 100644 index 000000000..2861caa53 --- /dev/null +++ b/src/site/xdoc/pack200.xml @@ -0,0 +1,89 @@ +<?xml version="1.0"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> +<document> + <properties> + <title>Commons Compress Pack200 Package</title> + <author email="dev@commons.apache.org">Commons Documentation Team</author> + </properties> + <body> + <section name="The Pack200 package"> + + <p>The Pack200 algorithm is not a general purpose compression + algorithm but one specialized for compressing JAR archives. JAR + archives compressed with Pack200 will in general be different + from the original archive when decompressed again. More + information can be found in the Javadocs of the <a + href="https://docs.oracle.com/javase/7/docs/api/java/util/jar/Pack200.Packer.html">Pack200.Packer + class</a>.</p> + + <p>While the <code>pack200</code> command line utility of the + JDK creates GZip compressed archives (<code>.pack.gz</code>) by + default, the streams provided by the Pack200 package only + perform the actual Pack200 operation. Wrap them in an + additional <code>GzipCompressor(In|Out)putStream</code> in order to deal + with deflated streams.</p> + + <subsection name="Pack200Strategy"> + + <p>The Pack200-API provided by the java class library is not + streaming friendly as it wants to consume its input completely + in a single operation. Because of this + <code>Pack200CompressorInputStream</code>'s constructor will immediately + unpack the stream, cache the results and provide an input + stream to the cache.</p> + + <p><code>Pack200CompressorOutputStream</code> will cache all data that + is written to it and then pack it once the <code>finish</code> + or <code>close</code> method is called.</p> + + <p>Two different caching modes are available - "in memory", + which is the default, and "temporary file". By default data + is cached in memory but you should switch to the temporary + file option if your archives are really big.</p> + + <p>Given there always is an intermediate result + the <code>getBytesRead</code> and <code>getCount</code> + methods of <code>Pack200CompressorInputStream</code> are + meaningless (read from the real stream or from the + intermediate result?) and always return 0.</p> + + </subsection> + + <subsection name="Normalization"> + + <p>As a pack/unpack cycle may create a JAR archive that is + different from the original, digital signatures created for + the initial JAR will be broken by the process. There is a way + to "normalize" JAR archives prior to packing them that ensures + signatures applied to the "normalized" JAR will still be valid + aftre a pack/unpack cycle - see <a + href="https://download.oracle.com/javase/7/docs/api/java/util/jar/Pack200.Packer.html">Pack200.Packer</a>'s + javadocs.</p> + + <p>The <code>Pack200Utils</code> class in the + <code>pack200</code> package provides several overloads of a + <code>normalize</code> method that can be used to prepare a + JAR archive in place or to a separate file.</p> + + </subsection> + + </section> +</body> +</document> diff --git a/src/site/xdoc/security-reports.xml b/src/site/xdoc/security-reports.xml new file mode 100644 index 000000000..caa2c3370 --- /dev/null +++ b/src/site/xdoc/security-reports.xml @@ -0,0 +1,142 @@ +<?xml version="1.0"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<document> + <properties> + <title>Commons Compress Security Reports</title> + <author email="dev@commons.apache.org">Commons Documentation Team</author> + </properties> + <body> + <section name="General Information"> + <p>For information about reporting or asking questions about + security problems, please see the <a + href="https://commons.apache.org/security.html">security page + of the Commons project</a>.</p> + </section> + + <section name="Apache Commons Compress Security Vulnerabilities"> + <p>This page lists all security vulnerabilities fixed in + released versions of Apache Commons Compress. Each + vulnerability is given a security impact rating by the + development team - please note that this rating may vary from + platform to platform. We also list the versions of Commons + Compress the flaw is known to affect, and where a flaw has not + been verified list the version with a question mark.</p> + + <p>Please note that binary patches are never provided. If you + need to apply a source code patch, use the building + instructions for the Commons Compress version that you are + using.</p> + + <p>If you need help on building Commons Compress or other help + on following the instructions to mitigate the known + vulnerabilities listed here, please send your questions to the + public <a href="mail-lists.html">Compress Users mailing + list</a>.</p> + + <p>If you have encountered an unlisted security vulnerability + or other unexpected behaviour that has security impact, or if + the descriptions here are incomplete, please report them + privately to the Apache Security Team. Thank you.</p> + + <subsection name="Fixed in Apache Commons Compress 1.18"> + <p><b>Low: Denial of Service</b> <a + href="https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-11771">CVE-2018-11771</a></p> + + <p>When reading a specially crafted ZIP archive, the read + method of <code>ZipArchiveInputStream</code> can fail to + return the correct EOF indication after the end of the + stream has been reached. When combined with a + <code>java.io.InputStreamReader</code> this can lead to an + infinite stream, which can be used to mount a denial of + service attack against services that use Compress' zip + package</p> + + <p>This was fixed in revision <a + href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blobdiff;f=src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java;h=e1995d7aa51dfac6ae933987fb0b7760c607582b;hp=0a2c1aa0063c620c867715119eae2013c87b5e70;hb=a41ce6892cb0590b2e658704434ac0dbcb6834c8;hpb=64ed6dde03afbef6715fdfdeab5fc04be6192899">a41ce68</a>.</p> + + <p>This was first reported to the Security Team on 14 June + 2018 and made public on 16 August 2018.</p> + + <p>Affects: 1.7 - 1.17</p> + + </subsection> + + <subsection name="Fixed in Apache Commons Compress 1.16"> + <p><b>Low: Denial of Service</b> <a + href="https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-1324">CVE-2018-1324</a></p> + + <p>A specially crafted ZIP archive can be used to cause an + infinite loop inside of Compress' extra field parser used by + the <code>ZipFile</code> and + <code>ZipArchiveInputStream</code> classes. This can be + used to mount a denial of service attack against services + that use Compress' zip package.</p> + + <p>This was fixed in revision <a + href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blobdiff;f=src/main/java/org/apache/commons/compress/archivers/zip/X0017_StrongEncryptionHeader.java;h=acc3b22346b49845e85b5ef27a5814b69e834139;hp=0feb9c98cc622cde1defa3bbd268ef82b4ae5c18;hb=2a2f1dc48e22a34ddb72321a4db211da91aa933b;hpb=dcb0486fb4cb2b6592c04d6ec2edbd3f690df5f2">2a2f1dc4</a>.</p> + + <p>This was first reported to the project's JIRA on <a + href="https://issues.apache.org/jira/browse/COMPRESS-432">19 + December 2017</a>.</p> + + <p>Affects: 1.11 - 1.15</p> + + </subsection> + + <subsection name="Fixed in Apache Commons Compress 1.4.1"> + <p><b>Low: Denial of Service</b> <a + href="https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2012-2098">CVE-2012-2098</a></p> + + <p>The bzip2 compressing streams in Apache Commons Compress + internally use sorting algorithms with unacceptable + worst-case performance on very repetitive inputs. A + specially crafted input to Compress' + <code>BZip2CompressorOutputStream</code> can be used to make + the process spend a very long time while using up all + available processing time effectively leading to a denial of + service.</p> + + <p>This was fixed in revisions + <a href="https://svn.apache.org/viewvc?view=revision&revision=1332540">1332540</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1332552">1332552</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1333522">1333522</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1337444">1337444</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1340715">1340715</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1340723">1340723</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1340757">1340757</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1340786">1340786</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1340787">1340787</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1340790">1340790</a>, + <a href="https://svn.apache.org/viewvc?view=revision&revision=1340795">1340795</a> and + <a href="https://svn.apache.org/viewvc?view=revision&revision=1340799">1340799</a>.</p> + + <p>This was first reported to the Security Team on 12 April + 2012 and made public on 23 May 2012.</p> + + <p>Affects: 1.0 - 1.4</p> + + </subsection> + </section> + + <section name="Errors and Ommissions"> + <p>Please report any errors or omissions to <a + href="mail-lists.html">the dev mailing list</a>.</p> + </section> + </body> +</document> diff --git a/src/site/xdoc/tar.xml b/src/site/xdoc/tar.xml new file mode 100644 index 000000000..c17ea8c0d --- /dev/null +++ b/src/site/xdoc/tar.xml @@ -0,0 +1,228 @@ +<?xml version="1.0"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> +<document> + <properties> + <title>Commons Compress TAR package</title> + <author email="dev@commons.apache.org">Commons Documentation Team</author> + </properties> + <body> + <section name="The TAR package"> + + <p>In addition to the information stored + in <code>ArchiveEntry</code> a <code>TarArchiveEntry</code> + stores various attributes including information about the + original owner and permissions.</p> + + <p>There are several different dialects of the TAR format, maybe + even different TAR formats. The tar package contains special + cases in order to read many of the existing dialects and will by + default try to create archives in the original format (often + called "ustar"). This original format didn't support file names + longer than 100 characters or bigger than 8 GiB and the tar + package will by default fail if you try to write an entry that + goes beyond those limits. "ustar" is the common denominator of + all the existing tar dialects and is understood by most of the + existing tools.</p> + + <p>The tar package does not support the full POSIX tar standard + nor more modern GNU extension of said standard.</p> + + <subsection name="Long File Names"> + + <p>The <code>longFileMode</code> option of + <code>TarArchiveOutputStream</code> controls how files with + names longer than 100 characters are handled. The possible + choices are:</p> + + <ul> + <li><code>LONGFILE_ERROR</code>: throw an exception if such a + file is added. This is the default.</li> + <li><code>LONGFILE_TRUNCATE</code>: truncate such names.</li> + <li><code>LONGFILE_GNU</code>: use a GNU tar variant now + refered to as "oldgnu" of storing such names. If you choose + the GNU tar option, the archive can not be extracted using + many other tar implementations like the ones of OpenBSD, + Solaris or MacOS X.</li> + <li><code>LONGFILE_POSIX</code>: use a PAX <a + href="http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html#tag_04_100_13_03">extended + header</a> as defined by POSIX 1003.1. Most modern tar + implementations are able to extract such archives. <em>since + Commons Compress 1.4</em></li> + </ul> + + <p><code>TarArchiveInputStream</code> will recognize the GNU + tar as well as the POSIX extensions (starting with Commons + Compress 1.2) for long file names and reads the longer names + transparently.</p> + </subsection> + + <subsection name="Big Numeric Values"> + + <p>The <code>bigNumberMode</code> option of + <code>TarArchiveOutputStream</code> controls how files larger + than 8GiB or with other big numeric values that can't be + encoded in traditional header fields are handled. The + possible choices are:</p> + + <ul> + <li><code>BIGNUMBER_ERROR</code>: throw an exception if such an + entry is added. This is the default.</li> + <li><code>BIGNUMBER_STAR</code>: use a variant first + introduced by Jörg Schilling's <a + href="http://developer.berlios.de/projects/star">star</a> + and later adopted by GNU and BSD tar. This method is not + supported by all implementations.</li> + <li><code>BIGNUMBER_POSIX</code>: use a PAX <a + href="http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html#tag_04_100_13_03">extended + header</a> as defined by POSIX 1003.1. Most modern tar + implementations are able to extract such archives.</li> + </ul> + + <p>Starting with Commons Compress 1.4 + <code>TarArchiveInputStream</code> will recognize the star as + well as the POSIX extensions for big numeric values and reads them + transparently.</p> + </subsection> + + <subsection name="File Name Encoding"> + <p>The original ustar format only supports 7-Bit ASCII file + names, later implementations use the platform's default + encoding to encode file names. The POSIX standard recommends + using PAX extension headers for non-ASCII file names + instead.</p> + + <p>Commons Compress 1.1 to 1.3 assumed file names would be + encoded using ISO-8859-1. Starting with Commons Compress 1.4 + you can specify the encoding to expect (to use when writing) + as a parameter to <code>TarArchiveInputStream</code> + (<code>TarArchiveOutputStream</code>), it now defaults to the + platform's default encoding.</p> + + <p>Since Commons Compress 1.4 another optional parameter - + <code>addPaxHeadersForNonAsciiNames</code> - of + <code>TarArchiveOutputStream</code> controls whether PAX + extension headers will be written for non-ASCII file names. + By default they will not be written to preserve space. + <code>TarArchiveInputStream</code> will read them + transparently if present.</p> + </subsection> + + <subsection name="Sparse files"> + + <p><code>TarArchiveInputStream</code> will recognize sparse + file entries stored using the "oldgnu" format + (<code>--sparse-version=0.0</code> in GNU tar) but is not + able to extract them correctly. <a href="#Unsupported + Features"><code>canReadEntryData</code></a> will return false + on such entries. The other variants of sparse files can + currently not be detected at all.</p> + </subsection> + + <subsection name="Consuming Archives Completely"> + + <p>The end of a tar archive is signalled by two consecutive + records of all zeros. Unfortunately not all tar + implementations adhere to this and some only write one record + to end the archive. Commons Compress will always write two + records but stop reading an archive as soon as finds one + record of all zeros.</p> + + <p>Prior to version 1.5 this could leave the second EOF record + inside the stream when <code>getNextEntry</code> or + <code>getNextTarEntry</code> returned <code>null</code> + Starting with version 1.5 <code>TarArchiveInputStream</code> + will try to read a second record as well if present, + effectively consuming the archive completely.</p> + + </subsection> + + <subsection name="PAX Extended Header"> + <p>The tar package has supported reading PAX extended headers + since 1.3 for local headers and 1.11 for global headers. The + following entries of PAX headers are applied when reading:</p> + + <dl> + <dt>path</dt> + <dd>set the entry's name</dd> + + <dt>linkpath</dt> + <dd>set the entry's link name</dd> + + <dt>gid</dt> + <dd>set the entry's group id</dd> + + <dt>gname</dt> + <dd>set the entry's group name</dd> + + <dt>uid</dt> + <dd>set the entry's user id</dd> + + <dt>uname</dt> + <dd>set the entry's user name</dd> + + <dt>size</dt> + <dd>set the entry's size</dd> + + <dt>mtime</dt> + <dd>set the entry's modification time</dd> + + <dt>SCHILY.devminor</dt> + <dd>set the entry's minor device number</dd> + + <dt>SCHILY.devmajor</dt> + <dd>set the entry's major device number</dd> + </dl> + + <p>in addition some fields used by GNU tar and star used to + signal sparse entries are supported and are used for the + <code>is*GNUSparse</code> and <code>isStarSparse</code> + methods.</p> + + <p>Some PAX extra headers may be set when writing archives, + for example for non-ASCII names or big numeric values. This + depends on various setting of the output stream - see the + previous sections.</p> + + <p>Since 1.15 you can directly access all PAX extension + headers that have been found when reading an entry or specify + extra headers to be written to a (local) PAX extended header + entry.</p> + + <p>Some hints if you try to set extended headers:</p> + + <ul> + <li>pax header keywords should be ascii. star/gnutar + (SCHILY.xattr.* ) do not check for this. libarchive/bsdtar + (LIBARCHIVE.xattr.*) uses URL-Encoding.</li> + <li>pax header values should be encoded as UTF-8 characters + (including trailing <code>\0</code>). star/gnutar + (SCHILY.xattr.*) do not check for this. libarchive/bsdtar + (LIBARCHIVE.xattr.*) encode values using Base64.</li> + <li>libarchive/bsdtar will read SCHILY.xattr headers, but + will not generate them.</li> + <li>gnutar will complain about LIBARCHIVE.xattr (and any + other unknown) headers and will neither encode nor decode + them.</li> + </ul> + </subsection> + + </section> + </body> +</document> diff --git a/src/site/xdoc/zip.xml b/src/site/xdoc/zip.xml new file mode 100644 index 000000000..59ff5dcb8 --- /dev/null +++ b/src/site/xdoc/zip.xml @@ -0,0 +1,568 @@ +<?xml version="1.0"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> +<document> + <properties> + <title>Commons Compress ZIP package</title> + <author email="dev@commons.apache.org">Commons Documentation Team</author> + </properties> + <body> + <section name="The ZIP package"> + + <p>The ZIP package provides features not found + in <code>java.util.zip</code>:</p> + + <ul> + <li>Support for encodings other than UTF-8 for filenames and + comments. Starting with Java7 this is supported + by <code>java.util.zip</code> as well.</li> + <li>Access to internal and external attributes (which are used + to store Unix permission by some zip implementations).</li> + <li>Structured support for extra fields.</li> + </ul> + + <p>In addition to the information stored + in <code>ArchiveEntry</code> a <code>ZipArchiveEntry</code> + stores internal and external attributes as well as extra + fields which may contain information like Unix permissions, + information about the platform they've been created on, their + last modification time and an optional comment.</p> + + <subsection name="ZipArchiveInputStream vs ZipFile"> + + <p>ZIP archives store a archive entries in sequence and + contain a registry of all entries at the very end of the + archive. It is acceptable for an archive to contain several + entries of the same name and have the registry (called the + central directory) decide which entry is actually to be used + (if any).</p> + + <p>In addition the ZIP format stores certain information only + inside the central directory but not together with the entry + itself, this is:</p> + + <ul> + <li>internal and external attributes</li> + <li>different or additional extra fields</li> + </ul> + + <p>This means the ZIP format cannot really be parsed + correctly while reading a non-seekable stream, which is what + <code>ZipArchiveInputStream</code> is forced to do. As a + result <code>ZipArchiveInputStream</code></p> + <ul> + <li>may return entries that are not part of the central + directory at all and shouldn't be considered part of the + archive.</li> + <li>may return several entries with the same name.</li> + <li>will not return internal or external attributes.</li> + <li>may return incomplete extra field data.</li> + <li>may return unknown sizes and CRC values for entries + until the next entry has been reached if the archive uses + the data descriptor feature (see below).</li> + </ul> + + <p><code>ZipArchiveInputStream</code> shares these limitations + with <code>java.util.zip.ZipInputStream</code>.</p> + + <p><code>ZipFile</code> is able to read the central directory + first and provide correct and complete information on any + ZIP archive.</p> + + <p>ZIP archives know a feature called the data descriptor + which is a way to store an entry's length after the entry's + data. This can only work reliably if the size information + can be taken from the central directory or the data itself + can signal it is complete, which is true for data that is + compressed using the DEFLATED compression algorithm.</p> + + <p><code>ZipFile</code> has access to the central directory + and can extract entries using the data descriptor reliably. + The same is true for <code>ZipArchiveInputStream</code> as + long as the entry is DEFLATED. For STORED + entries <code>ZipArchiveInputStream</code> can try to read + ahead until it finds the next entry, but this approach is + not safe and has to be enabled by a constructor argument + explicitly.</p> + + <p>If possible, you should always prefer <code>ZipFile</code> + over <code>ZipArchiveInputStream</code>.</p> + + <p><code>ZipFile</code> requires a + <code>SeekableByteChannel</code> that will be obtained + transparently when reading from a file. The class + <code>org.apache.commons.compress.utils.SeekableInMemoryByteChannel</code> + allows you to read from an in-memory archive.</p> + + </subsection> + + <subsection name="ZipArchiveOutputStream" id="ZipArchiveOutputStream"> + <p><code>ZipArchiveOutputStream</code> has three constructors, + one of them uses a <code>File</code> argument, one a + <code>SeekableByteChannel</code> and the last uses an + <code>OutputStream</code>. The <code>File</code> version will + try to use <code>SeekableByteChannel</code> and fall back to + using a <code>FileOutputStream</code> internally if that + fails.</p> + + <p>If <code>ZipArchiveOutputStream</code> can + use <code>SeekableByteChannel</code> it can employ some + optimizations that lead to smaller archives. It also makes + it possible to add uncompressed (<code>setMethod</code> used + with <code>STORED</code>) entries of unknown size when + calling <code>putArchiveEntry</code> - this is not allowed + if <code>ZipArchiveOutputStream</code> has to use + an <code>OutputStream</code>.</p> + + <p>If you know you are writing to a file, you should always + prefer the <code>File</code>- or + <code>SeekableByteChannel</code>-arg constructors. The class + <code>org.apache.commons.compress.utils.SeekableInMemoryByteChannel</code> + allows you to write to an in-memory archive.</p> + + </subsection> + + <subsection name="Extra Fields"> + + <p>Inside a ZIP archive, additional data can be attached to + each entry. The <code>java.util.zip.ZipEntry</code> class + provides access to this via the <code>get/setExtra</code> + methods as arrays of <code>byte</code>s.</p> + + <p>Actually the extra data is supposed to be more structured + than that and Compress' ZIP package provides access to the + structured data as <code>ExtraField</code> instances. Only + a subset of all defined extra field formats is supported by + the package, any other extra field will be stored + as <code>UnrecognizedExtraField</code>.</p> + + <p>Prior to version 1.1 of this library trying to read an + archive with extra fields that didn't follow the recommended + structure for those fields would cause Compress to throw an + exception. Starting with version 1.1 these extra fields + will now be read + as <code>UnparseableExtraFieldData</code>.</p> + + </subsection> + + <subsection name="Encoding" id="encoding"> + + <p>Traditionally the ZIP archive format uses CodePage 437 as + encoding for file name, which is not sufficient for many + international character sets.</p> + + <p>Over time different archivers have chosen different ways to + work around the limitation - the <code>java.util.zip</code> + packages simply uses UTF-8 as its encoding for example.</p> + + <p>Ant has been offering the encoding attribute of the zip and + unzip task as a way to explicitly specify the encoding to + use (or expect) since Ant 1.4. It defaults to the + platform's default encoding for zip and UTF-8 for jar and + other jar-like tasks (war, ear, ...) as well as the unzip + family of tasks.</p> + + <p>More recent versions of the ZIP specification introduce + something called the "language encoding flag" + which can be used to signal that a file name has been + encoded using UTF-8. All ZIP-archives written by Compress + will set this flag, if the encoding has been set to UTF-8. + Our interoperability tests with existing archivers didn't + show any ill effects (in fact, most archivers ignore the + flag to date), but you can turn off the "language encoding + flag" by setting the attribute + <code>useLanguageEncodingFlag</code> to <code>false</code> on the + <code>ZipArchiveOutputStream</code> if you should encounter + problems.</p> + + <p>The <code>ZipFile</code> + and <code>ZipArchiveInputStream</code> classes will + recognize the language encoding flag and ignore the encoding + set in the constructor if it has been found.</p> + + <p>The InfoZIP developers have introduced new ZIP extra fields + that can be used to add an additional UTF-8 encoded file + name to the entry's metadata. Most archivers ignore these + extra fields. <code>ZipArchiveOutputStream</code> supports + an option <code>createUnicodeExtraFields</code> which makes + it write these extra fields either for all entries + ("always") or only those whose name cannot be encoded using + the specified encoding (not-encodeable), it defaults to + "never" since the extra fields create bigger archives.</p> + + <p>The fallbackToUTF8 attribute + of <code>ZipArchiveOutputStream</code> can be used to create + archives that use the specified encoding in the majority of + cases but UTF-8 and the language encoding flag for filenames + that cannot be encoded using the specified encoding.</p> + + <p>The <code>ZipFile</code> + and <code>ZipArchiveInputStream</code> classes recognize the + Unicode extra fields by default and read the file name + information from them, unless you set the constructor parameter + <code>scanForUnicodeExtraFields</code> to false.</p> + + <h4>Recommendations for Interoperability</h4> + + <p>The optimal setting of flags depends on the archivers you + expect as consumers/producers of the ZIP archives. Below + are some test results which may be superseded with later + versions of each tool.</p> + + <ul> + <li>The java.util.zip package used by the jar executable or + to read jars from your CLASSPATH reads and writes UTF-8 + names, it doesn't set or recognize any flags or Unicode + extra fields.</li> + + <li>Starting with Java7 <code>java.util.zip</code> writes + UTF-8 by default and uses the language encoding flag. It + is possible to specify a different encoding when + reading/writing ZIPs via new constructors. The package + now recognizes the language encoding flag when reading and + ignores the Unicode extra fields.</li> + + <li>7Zip writes CodePage 437 by default but uses UTF-8 and + the language encoding flag when writing entries that + cannot be encoded as CodePage 437 (similar to the zip task + with fallbacktoUTF8 set to true). It recognizes the + language encoding flag when reading and ignores the + Unicode extra fields.</li> + + <li>WinZIP writes CodePage 437 and uses Unicode extra fields + by default. It recognizes the Unicode extra field and the + language encoding flag when reading.</li> + + <li>Windows' "compressed folder" feature doesn't recognize + any flag or extra field and creates archives using the + platforms default encoding - and expects archives to be in + that encoding when reading them.</li> + + <li>InfoZIP based tools can recognize and write both, it is + a compile time option and depends on the platform so your + mileage may vary.</li> + + <li>PKWARE zip tools recognize both and prefer the language + encoding flag. They create archives using CodePage 437 if + possible and UTF-8 plus the language encoding flag for + file names that cannot be encoded as CodePage 437.</li> + </ul> + + <p>So, what to do?</p> + + <p>If you are creating jars, then java.util.zip is your main + consumer. We recommend you set the encoding to UTF-8 and + keep the language encoding flag enabled. The flag won't + help or hurt java.util.zip prior to Java7 but archivers that + support it will show the correct file names.</p> + + <p>For maximum interop it is probably best to set the encoding + to UTF-8, enable the language encoding flag and create + Unicode extra fields when writing ZIPs. Such archives + should be extracted correctly by java.util.zip, 7Zip, + WinZIP, PKWARE tools and most likely InfoZIP tools. They + will be unusable with Windows' "compressed folders" feature + and bigger than archives without the Unicode extra fields, + though.</p> + + <p>If Windows' "compressed folders" is your primary consumer, + then your best option is to explicitly set the encoding to + the target platform. You may want to enable creation of + Unicode extra fields so the tools that support them will + extract the file names correctly.</p> + </subsection> + + <subsection name="Encryption and Alternative Compression Algorithms" + id="encryption"> + + <p>In most cases entries of an archive are not encrypted and + are either not compressed at all or use the DEFLATE + algorithm, Commons Compress' ZIP archiver will handle them + just fine. As of version 1.7, Commons Compress can also + decompress entries compressed with the legacy SHRINK and + IMPLODE algorithms of PKZIP 1.x. Version 1.11 of Commons + Compress adds read-only support for BZIP2. Version 1.16 adds + read-only support for DEFLATE64 - also known as "enhanced DEFLATE".</p> + + <p>The ZIP specification allows for various other compression + algorithms and also supports several different ways of + encrypting archive contents. Neither of those methods is + currently supported by Commons Compress and any such entry can + not be extracted by the archiving code.</p> + + <p><code>ZipFile</code>'s and + <code>ZipArchiveInputStream</code>'s + <code>canReadEntryData</code> methods will return false for + encrypted entries or entries using an unsupported encryption + mechanism. Using this method it is possible to at least + detect and skip the entries that can not be extracted.</p> + + <table> + <thead> + <tr> + <th>Version of Apache Commons Compress</th> + <th>Supported Compression Methods</th> + <th>Supported Encryption Methods</th> + </tr> + </thead> + <tbody> + <tr> + <td>1.0 to 1.6</td> + <td>STORED, DEFLATE</td> + <td>-</td> + </tr> + <tr> + <td>1.7 to 1.10</td> + <td>STORED, DEFLATE, SHRINK, IMPLODE</td> + <td>-</td> + </tr> + <tr> + <td>1.11 to 1.15</td> + <td>STORED, DEFLATE, SHRINK, IMPLODE, BZIP2</td> + <td>-</td> + </tr> + <tr> + <td>1.16 and later</td> + <td>STORED, DEFLATE, SHRINK, IMPLODE, BZIP2, DEFLATE64 + (enhanced deflate)</td> + <td>-</td> + </tr> + </tbody> + </table> + + </subsection> + + <subsection name="Zip64 Support" id="zip64"> + <p>The traditional ZIP format is limited to archive sizes of + four gibibyte (actually 2<sup>32</sup> - 1 bytes ≈ + 4.3 GB) and 65635 entries, where each individual entry is + limited to four gibibyte as well. These limits seemed + excessive in the 1980s.</p> + + <p>Version 4.5 of the ZIP specification introduced the so + called "Zip64 extensions" to push those limitations for + compressed or uncompressed sizes of up to 16 exbibyte + (actually 2<sup>64</sup> - 1 bytes ≈ 18.5 EB, i.e + 18.5 x 10<sup>18</sup> bytes) in archives that themselves + can take up to 16 exbibyte containing more than + 18 x 10<sup>18</sup> entries.</p> + + <p>Apache Commons Compress 1.2 and earlier do not support + Zip64 extensions at all.</p> + + <p>Starting with Apache Commons Compress + 1.3 <code>ZipArchiveInputStream</code> + and <code>ZipFile</code> transparently support Zip64 + extensions. By default <code>ZipArchiveOutputStream</code> + supports them transparently as well (i.e. it adds Zip64 + extensions if needed and doesn't use them for + entries/archives that don't need them) if the compressed and + uncompressed sizes of the entry are known + when <code>putArchiveEntry</code> is called + or <code>ZipArchiveOutputStream</code> + uses <code>SeekableByteChannel</code> + (see <a href="#ZipArchiveOutputStream">above</a>). If only + the uncompressed size is + known <code>ZipArchiveOutputStream</code> will assume the + compressed size will not be bigger than the uncompressed + size.</p> + + <p><code>ZipArchiveOutputStream</code>'s + <code>setUseZip64</code> can be used to control the behavior. + <code>Zip64Mode.AsNeeded</code> is the default behavior + described in the previous paragraph.</p> + + <p>If <code>ZipArchiveOutputStream</code> is writing to a + non-seekable stream it has to decide whether to use Zip64 + extensions or not before it starts wrtiting the entry data. + This means that if the size of the entry is unknown + when <code>putArchiveEntry</code> is called it doesn't have + anything to base the decision on. By default it will not + use Zip64 extensions in order to create archives that can be + extracted by older archivers (it will later throw an + exception in <code>closeEntry</code> if it detects Zip64 + extensions had been needed). It is possible to + instruct <code>ZipArchiveOutputStream</code> to always + create Zip64 extensions by using + the <code>setUseZip64</code> with an argument + of <code>Zip64Mode.Always</code>; use this if you are + writing entries of unknown size to a stream and expect some + of them to be too big to fit into the traditional + limits.</p> + + <p><code>Zip64Mode.Always</code> creates archives that use + Zip64 extensions for all entries, even those that don't + require them. Such archives will be slightly bigger than + archives created with one of the other modes and not be + readable by unarchivers that don't support Zip64 + extensions.</p> + + <p><code>Zip64Mode.Never</code> will not use any Zip64 + extensions at all and may lead to + a <code>Zip64RequiredException</code> to be thrown + if <code>ZipArchiveOutputStream</code> detects that one of + the format's limits is exceeded. Archives created in this + mode will be readable by all unarchivers; they may be + slightly smaller than archives created + with <code>SeekableByteChannel</code> + in <code>Zip64Mode.AsNeeded</code> mode if some of the + entries had unknown sizes.</p> + + <p>The <code>java.util.zip</code> package and the + <code>jar</code> command of Java5 and earlier can not read + Zip64 extensions and will fail if the archive contains any. + So if you intend to create archives that Java5 can consume + you must set the mode to <code>Zip64Mode.Never</code></p> + + <h4>Known Limitations</h4> + + <p>Some of the theoretical limits of the format are not + reached because Apache Commons Compress' own API + (<code>ArchiveEntry</code>'s size information uses + a <code>long</code>) or its usage of Java collections + or <code>SeekableByteChannel</code> internally. The table + below shows the theoretical limits supported by Apache + Commons Compress. In practice it is very likely that you'd + run out of memory or your file system won't allow files that + big long before you reach either limit.</p> + + <table> + <thead> + <tr> + <th/> + <th>Max. Size of Archive</th> + <th>Max. Compressed/Uncompressed Size of Entry</th> + <th>Max. Number of Entries</th> + </tr> + </thead> + <tbody> + <tr> + <td>ZIP Format Without Zip 64 Extensions</td> + <td>2<sup>32</sup> - 1 bytes ≈ 4.3 GB</td> + <td>2<sup>32</sup> - 1 bytes ≈ 4.3 GB</td> + <td>65535</td> + </tr> + <tr> + <td>ZIP Format using Zip 64 Extensions</td> + <td>2<sup>64</sup> - 1 bytes ≈ 18.5 EB</td> + <td>2<sup>64</sup> - 1 bytes ≈ 18.5 EB</td> + <td>2<sup>64</sup> - 1 ≈ 18.5 x 10<sup>18</sup></td> + </tr> + <tr> + <td>Commons Compress 1.2 and earlier</td> + <td>unlimited in <code>ZipArchiveInputStream</code> + and <code>ZipArchiveOutputStream</code> and + 2<sup>32</sup> - 1 bytes ≈ 4.3 GB + in <code>ZipFile</code>.</td> + <td>2<sup>32</sup> - 1 bytes ≈ 4.3 GB</td> + <td>unlimited in <code>ZipArchiveInputStream</code>, + 65535 in <code>ZipArchiveOutputStream</code> + and <code>ZipFile</code>.</td> + </tr> + <tr> + <td>Commons Compress 1.3 and later</td> + <td>unlimited in <code>ZipArchiveInputStream</code> + and <code>ZipArchiveOutputStream</code> and + 2<sup>63</sup> - 1 bytes ≈ 9.2 EB + in <code>ZipFile</code>.</td> + <td>2<sup>63</sup> - 1 bytes ≈ 9.2 EB</td> + <td>unlimited in <code>ZipArchiveInputStream</code>, + 2<sup>31</sup> - 1 ≈ 2.1 billion + in <code>ZipArchiveOutputStream</code> + and <code>ZipFile</code>.</td> + </tr> + </tbody> + </table> + + <h4>Known Interoperability Problems</h4> + + <p>The <code>java.util.zip</code> package of OpenJDK7 supports + Zip 64 extensions but its <code>ZipInputStream</code> and + <code>ZipFile</code> classes will be unable to extract + archives created with Commons Compress 1.3's + <code>ZipArchiveOutputStream</code> if the archive contains + entries that use the data descriptor, are smaller than 4 GiB + and have Zip 64 extensions enabled. I.e. the classes in + OpenJDK currently only support archives that use Zip 64 + extensions only when they are actually needed. These classes + are used to load JAR files and are the base for the + <code>jar</code> command line utility as well.</p> + </subsection> + + <subsection name="Consuming Archives Completely"> + + <p>Prior to version 1.5 <code>ZipArchiveInputStream</code> + would return null from <code>getNextEntry</code> or + <code>getNextZipEntry</code> as soon as the first central + directory header of the archive was found, leaving the whole + central directory itself unread inside the stream. Starting + with version 1.5 <code>ZipArchiveInputStream</code> will try + to read the archive up to and including the "end of central + directory" record effectively consuming the archive + completely.</p> + + </subsection> + + <subsection name="Symbolic Links" id="symlinks"> + + <p>Starting with Compress 1.5 <code>ZipArchiveEntry</code> + recognizes Unix Symbolic Link entries written by InfoZIP's + zip.</p> + + <p>The <code>ZipFile</code> class contains a convenience + method to read the link name of an entry. Basically all it + does is read the contents of the entry and convert it to + a string using the given file name encoding of the + archive.</p> + + </subsection> + + <subsection name="Parallel zip creation" id="parallel"> + + <p>Starting with Compress 1.10 there is now built-in support for + parallel creation of zip archives</p> + + <p>Multiple threads can write + to their own <code>ScatterZipOutputStream</code> + instance that is backed to file or to some user-implemented form of + storage (implementing <code>ScatterGatherBackingStore</code>).</p> + + <p>When the threads finish, they can join these streams together + to a complete zip file using the <code>writeTo</code> method + that will write a single <code>ScatterOutputStream</code> to a target + <code>ZipArchiveOutputStream</code>.</p> + + <p>To assist this process, clients can use + <code>ParallelScatterZipCreator</code> that will handle threads + pools and correct memory model consistency so the client + can avoid these issues. Please note that when writing well-formed + Zip files this way, it is usually necessary to keep a + separate <code>ScatterZipOutputStream</code> that receives all directories + and writes this to the target <code>ZipArchiveOutputStream</code> before + the ones created through <code>ParallelScatterZipCreator</code>. This is the responsibility of the client.</p> + + <p>There is no guarantee of order of the entries when writing a Zip + file with <code>ParallelScatterZipCreator</code>.</p> + + See the examples section for a code sample demonstrating how to make a zip file. + </subsection> + + </section> + </body> +</document> diff --git a/src/test/java/org/apache/commons/compress/AbstractTestCase.java b/src/test/java/org/apache/commons/compress/AbstractTestCase.java new file mode 100644 index 000000000..5a52aeaeb --- /dev/null +++ b/src/test/java/org/apache/commons/compress/AbstractTestCase.java @@ -0,0 +1,406 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress; + +import static org.junit.Assert.*; +import java.io.BufferedInputStream; +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.After; +import org.junit.Before; + +public abstract class AbstractTestCase { + + protected File dir; + protected File resultDir; + + private File archive; // used to delete the archive in tearDown + protected List<String> archiveList; // Lists the content of the archive as originally created + + protected ArchiveStreamFactory factory = new ArchiveStreamFactory(); + + @Before + public void setUp() throws Exception { + dir = mkdir("dir"); + resultDir = mkdir("dir-result"); + archive = null; + } + + public static File mkdir(final String name) throws IOException { + final File f = File.createTempFile(name, ""); + f.delete(); + f.mkdir(); + return f; + } + + public static File getFile(final String path) throws IOException { + final URL url = AbstractTestCase.class.getClassLoader().getResource(path); + if (url == null) { + throw new FileNotFoundException("couldn't find " + path); + } + URI uri = null; + try { + uri = url.toURI(); + } catch (final java.net.URISyntaxException ex) { + throw new IOException(ex); + } + return new File(uri); + } + + @After + public void tearDown() throws Exception { + rmdir(dir); + rmdir(resultDir); + dir = resultDir = null; + if (!tryHardToDelete(archive)) { + // Note: this exception won't be shown if the test has already failed + throw new Exception("Could not delete "+archive.getPath()); + } + } + + public static void rmdir(final File f) { + final String[] s = f.list(); + if (s != null) { + for (final String element : s) { + final File file = new File(f, element); + if (file.isDirectory()){ + rmdir(file); + } + final boolean ok = tryHardToDelete(file); + if (!ok && file.exists()){ + System.out.println("Failed to delete "+element+" in "+f.getPath()); + } + } + } + tryHardToDelete(f); // safer to delete and check + if (f.exists()){ + throw new Error("Failed to delete "+f.getPath()); + } + } + + private static final boolean ON_WINDOWS = + System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows"); + + /** + * Accommodate Windows bug encountered in both Sun and IBM JDKs. + * Others possible. If the delete does not work, call System.gc(), + * wait a little and try again. + * + * @return whether deletion was successful + * @since Stolen from FileUtils in Ant 1.8.0 + */ + public static boolean tryHardToDelete(final File f) { + if (f != null && f.exists() && !f.delete()) { + if (ON_WINDOWS) { + System.gc(); + } + try { + Thread.sleep(10); + } catch (final InterruptedException ex) { + // Ignore Exception + } + return f.delete(); + } + return true; + } + + /** + * Creates an archive of textbased files in several directories. The + * archivername is the factory identifier for the archiver, for example zip, + * tar, cpio, jar, ar. The archive is created as a temp file. + * + * The archive contains the following files: + * <ul> + * <li>testdata/test1.xml</li> + * <li>testdata/test2.xml</li> + * <li>test/test3.xml</li> + * <li>bla/test4.xml</li> + * <li>bla/test5.xml</li> + * <li>bla/blubber/test6.xml</li> + * <li>test.txt</li> + * <li>something/bla</li> + * <li>test with spaces.txt</li> + * </ul> + * + * @param archivename + * the identifier of this archive + * @return the newly created file + * @throws Exception + * in case something goes wrong + */ + protected File createArchive(final String archivename) throws Exception { + ArchiveOutputStream out = null; + OutputStream stream = null; + try { + archive = File.createTempFile("test", "." + archivename); + archive.deleteOnExit(); + archiveList = new ArrayList<>(); + + stream = new FileOutputStream(archive); + out = factory.createArchiveOutputStream(archivename, stream); + + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + final File file3 = getFile("test3.xml"); + final File file4 = getFile("test4.xml"); + final File file5 = getFile("test.txt"); + final File file6 = getFile("test with spaces.txt"); + + addArchiveEntry(out, "testdata/test1.xml", file1); + addArchiveEntry(out, "testdata/test2.xml", file2); + addArchiveEntry(out, "test/test3.xml", file3); + addArchiveEntry(out, "bla/test4.xml", file4); + addArchiveEntry(out, "bla/test5.xml", file4); + addArchiveEntry(out, "bla/blubber/test6.xml", file4); + addArchiveEntry(out, "test.txt", file5); + addArchiveEntry(out, "something/bla", file6); + addArchiveEntry(out, "test with spaces.txt", file6); + + out.finish(); + return archive; + } finally { + if (out != null) { + out.close(); + } else if (stream != null) { + stream.close(); + } + } + } + + /** + * Add an entry to the archive, and keep track of the names in archiveList. + * + * @param out + * @param file1 + * @throws IOException + * @throws FileNotFoundException + */ + private void addArchiveEntry(final ArchiveOutputStream out, final String filename, final File infile) + throws IOException, FileNotFoundException { + final ArchiveEntry entry = out.createArchiveEntry(infile, filename); + out.putArchiveEntry(entry); + IOUtils.copy(new FileInputStream(infile), out); + out.closeArchiveEntry(); + archiveList.add(filename); + } + + /** + * Create an empty archive. + * @param archivename + * @return the archive File + * @throws Exception + */ + protected File createEmptyArchive(final String archivename) throws Exception { + ArchiveOutputStream out = null; + OutputStream stream = null; + archiveList = new ArrayList<>(); + try { + archive = File.createTempFile("empty", "." + archivename); + archive.deleteOnExit(); + stream = new FileOutputStream(archive); + out = factory.createArchiveOutputStream(archivename, stream); + out.finish(); + } finally { + if (out != null) { + out.close(); + } else if (stream != null) { + stream.close(); + } + } + return archive; + } + + /** + * Create an archive with a single file "test1.xml". + * + * @param archivename + * @return the archive File + * @throws Exception + */ + protected File createSingleEntryArchive(final String archivename) throws Exception { + ArchiveOutputStream out = null; + OutputStream stream = null; + archiveList = new ArrayList<>(); + try { + archive = File.createTempFile("empty", "." + archivename); + archive.deleteOnExit(); + stream = new FileOutputStream(archive); + out = factory.createArchiveOutputStream(archivename, stream); + // Use short file name so does not cause problems for ar + addArchiveEntry(out, "test1.xml", getFile("test1.xml")); + out.finish(); + } finally { + if (out != null) { + out.close(); + } else if (stream != null) { + stream.close(); + } + } + return archive; + } + + /** + * Checks if an archive contains all expected files. + * + * @param archive + * the archive to check + * @param expected + * a list with expected string filenames + * @throws Exception + */ + protected void checkArchiveContent(final File archive, final List<String> expected) + throws Exception { + try (InputStream is = new FileInputStream(archive)) { + final BufferedInputStream buf = new BufferedInputStream(is); + final ArchiveInputStream in = factory.createArchiveInputStream(buf); + this.checkArchiveContent(in, expected); + } + } + + /** + * Checks that an archive input stream can be read, and that the file data matches file sizes. + * + * @param in + * @param expected list of expected entries or {@code null} if no check of names desired + * @throws Exception + */ + protected void checkArchiveContent(final ArchiveInputStream in, final List<String> expected) + throws Exception { + checkArchiveContent(in, expected, true); + } + + /** + * Checks that an archive input stream can be read, and that the file data matches file sizes. + * + * @param in + * @param expected list of expected entries or {@code null} if no check of names desired + * @param cleanUp Cleans up resources if true + * @return returns the created result file if cleanUp = false, or null otherwise + * @throws Exception + */ + protected File checkArchiveContent(final ArchiveInputStream in, final List<String> expected, final boolean cleanUp) + throws Exception { + final File result = mkdir("dir-result"); + result.deleteOnExit(); + + try { + ArchiveEntry entry = null; + while ((entry = in.getNextEntry()) != null) { + final File outfile = new File(result.getCanonicalPath() + "/result/" + + entry.getName()); + long copied=0; + if (entry.isDirectory()){ + outfile.mkdirs(); + } else { + outfile.getParentFile().mkdirs(); + try (OutputStream out = new FileOutputStream(outfile)) { + copied = IOUtils.copy(in, out); + } + } + final long size = entry.getSize(); + if (size != ArchiveEntry.SIZE_UNKNOWN) { + assertEquals("Entry.size should equal bytes read.",size, copied); + } + + if (!outfile.exists()) { + fail("extraction failed: " + entry.getName()); + } + if (expected != null && !expected.remove(getExpectedString(entry))) { + fail("unexpected entry: " + getExpectedString(entry)); + } + } + in.close(); + if (expected != null && expected.size() > 0) { + fail(expected.size() + " missing entries: " + Arrays.toString(expected.toArray())); + } + if (expected != null) { + assertEquals(0, expected.size()); + } + } finally { + if (cleanUp) { + rmdir(result); + } + } + return result; + } + + /** + * Override this method to change what is to be compared in the List. + * For example, size + name instead of just name. + * + * @param entry + * @return returns the entry name + */ + protected String getExpectedString(final ArchiveEntry entry) { + return entry.getName(); + } + + /** + * Creates a temporary directory and a temporary file inside that + * directory, returns both of them (the directory is the first + * element of the two element array). + */ + protected File[] createTempDirAndFile() throws IOException { + final File tmpDir = createTempDir(); + final File tmpFile = File.createTempFile("testfile", "", tmpDir); + tmpFile.deleteOnExit(); + try (FileOutputStream fos = new FileOutputStream(tmpFile)) { + fos.write(new byte[] { 'f', 'o', 'o' }); + return new File[] { tmpDir, tmpFile }; + } + } + + protected File createTempDir() throws IOException { + final File tmpDir = mkdir("testdir"); + tmpDir.deleteOnExit(); + return tmpDir; + } + + protected void closeQuietly(final Closeable closeable){ + if (closeable != null) { + try { + closeable.close(); + } catch (final IOException ignored) { + // ignored + } + } + } + + protected static interface StreamWrapper<I extends InputStream> { + I wrap(InputStream in) throws Exception; + } +} diff --git a/src/test/java/org/apache/commons/compress/ArchiveReadTest.java b/src/test/java/org/apache/commons/compress/ArchiveReadTest.java new file mode 100644 index 000000000..ad016a2b7 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/ArchiveReadTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.FilenameFilter; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collection; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test that can read various archive file examples. + * + * This is a very simple implementation. + * + * Files must be in resources/archives, and there must be a file.txt containing + * the list of files in the archives. + */ +@RunWith(Parameterized.class) +public class ArchiveReadTest extends AbstractTestCase { + + private static final ClassLoader CLASSLOADER = ArchiveReadTest.class.getClassLoader(); + private static final File ARCDIR; + private static final ArrayList<String> FILELIST = new ArrayList<>(); + + static { + try { + ARCDIR = new File(CLASSLOADER.getResource("archives").toURI()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + private final File file; + + public ArchiveReadTest(final String file){ + this.file = new File(ARCDIR, file); + } + + @BeforeClass + public static void setUpFileList() throws Exception { + assertTrue(ARCDIR.exists()); + final File listing= new File(ARCDIR,"files.txt"); + assertTrue("files.txt is readable",listing.canRead()); + final BufferedReader br = new BufferedReader(new FileReader(listing)); + String line; + while ((line=br.readLine())!=null){ + if (!line.startsWith("#")){ + FILELIST.add(line); + } + } + br.close(); + } + + @Parameters(name = "file={0}") + public static Collection<Object[]> data() { + assertTrue(ARCDIR.exists()); + final Collection<Object[]> params = new ArrayList<>(); + for (final String f : ARCDIR.list(new FilenameFilter() { + @Override + public boolean accept(final File dir, final String name) { + return !name.endsWith(".txt"); + } + })) + { + params.add(new Object[] { f }); + } + return params; + } + + // files.txt contains size and filename + @Override + protected String getExpectedString(final ArchiveEntry entry) { + return entry.getSize() + " " + entry.getName(); + } + + @Test + public void testArchive() throws Exception{ + @SuppressWarnings("unchecked") // fileList is correct type already + final + ArrayList<String> expected= (ArrayList<String>) FILELIST.clone(); + try { + checkArchiveContent(file, expected); + } catch (final ArchiveException e) { + fail("Problem checking "+file); + } catch (final AssertionError e) { // show error in context + fail("Problem checking " + file + " " +e); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/ArchiveUtilsTest.java b/src/test/java/org/apache/commons/compress/ArchiveUtilsTest.java new file mode 100644 index 000000000..9e4c51bec --- /dev/null +++ b/src/test/java/org/apache/commons/compress/ArchiveUtilsTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress; + +import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; +import org.apache.commons.compress.utils.ArchiveUtils; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class ArchiveUtilsTest extends AbstractTestCase { + + private static final int bytesToTest = 50; + private static final byte[] byteTest = new byte[bytesToTest]; + static { + for(int i=0; i < byteTest.length ;) { + byteTest[i]=(byte) i; + byteTest[i+1]=(byte) -i; + i += 2; + } + } + + @Test + public void testCompareBA(){ + final byte[] buffer1 = {1,2,3}; + final byte[] buffer2 = {1,2,3,0}; + final byte[] buffer3 = {1,2,3}; + assertTrue(ArchiveUtils.isEqual(buffer1, buffer2, true)); + assertFalse(ArchiveUtils.isEqual(buffer1, buffer2, false)); + assertFalse(ArchiveUtils.isEqual(buffer1, buffer2)); + assertTrue(ArchiveUtils.isEqual(buffer2, buffer1, true)); + assertFalse(ArchiveUtils.isEqual(buffer2, buffer1, false)); + assertFalse(ArchiveUtils.isEqual(buffer2, buffer1)); + assertTrue(ArchiveUtils.isEqual(buffer1, buffer3)); + assertTrue(ArchiveUtils.isEqual(buffer3, buffer1)); + } + + @Test + public void testCompareAscii(){ + final byte[] buffer1 = {'a','b','c'}; + final byte[] buffer2 = {'d','e','f',0}; + assertTrue(ArchiveUtils.matchAsciiBuffer("abc", buffer1)); + assertFalse(ArchiveUtils.matchAsciiBuffer("abc\0", buffer1)); + assertTrue(ArchiveUtils.matchAsciiBuffer("def\0", buffer2)); + assertFalse(ArchiveUtils.matchAsciiBuffer("def", buffer2)); + } + + @Test + public void testAsciiConversions() { + asciiToByteAndBackOK(""); + asciiToByteAndBackOK("abcd"); + asciiToByteAndBackFail("\u8025"); + } + + @Test + public void sanitizeShortensString() { + final String input = "012345678901234567890123456789012345678901234567890123456789" + + "012345678901234567890123456789012345678901234567890123456789" + + "012345678901234567890123456789012345678901234567890123456789" + + "012345678901234567890123456789012345678901234567890123456789" + + "012345678901234567890123456789012345678901234567890123456789"; + final String expected = "012345678901234567890123456789012345678901234567890123456789" + + "012345678901234567890123456789012345678901234567890123456789" + + "012345678901234567890123456789012345678901234567890123456789" + + "012345678901234567890123456789012345678901234567890123456789" + + "012345678901..."; + assertEquals(expected, ArchiveUtils.sanitize(input)); + } + + @Test + public void sanitizeLeavesShortStringsAlone() { + final String input = "012345678901234567890123456789012345678901234567890123456789"; + assertEquals(input, ArchiveUtils.sanitize(input)); + } + + @Test + public void sanitizeRemovesUnprintableCharacters() { + final String input = "\b12345678901234567890123456789012345678901234567890123456789"; + final String expected = "?12345678901234567890123456789012345678901234567890123456789"; + assertEquals(expected, ArchiveUtils.sanitize(input)); + } + + @Test + public void testIsEqualWithNullWithPositive() { + + byte[] byteArray = new byte[8]; + byteArray[1] = (byte) (-77); + + assertFalse(ArchiveUtils.isEqualWithNull(byteArray, 0, (byte)0, byteArray, (byte)0, (byte)80)); + + } + + @Test + public void testToAsciiBytes() { + + byte[] byteArray = ArchiveUtils.toAsciiBytes("SOCKET"); + + assertArrayEquals(new byte[] {(byte)83, (byte)79, (byte)67, (byte)75, (byte)69, (byte)84}, byteArray); + + assertFalse(ArchiveUtils.isEqualWithNull(byteArray, 0, 46, byteArray, 63, 0)); + + } + + @Test + public void testToStringWithNonNull() { + + SevenZArchiveEntry sevenZArchiveEntry = new SevenZArchiveEntry(); + String string = ArchiveUtils.toString(sevenZArchiveEntry); + + assertEquals("- 0 null", string); + + } + + @Test + public void testIsEqual() { + + assertTrue(ArchiveUtils.isEqual((byte[]) null, 0, 0, (byte[]) null, 0, 0)); + + } + + @Test(expected = StringIndexOutOfBoundsException.class) + public void testToAsciiStringThrowsStringIndexOutOfBoundsException() { + + byte[] byteArray = new byte[3]; + + ArchiveUtils.toAsciiString(byteArray, 940, 2730); + + } + + private void asciiToByteAndBackOK(final String inputString) { + assertEquals(inputString, ArchiveUtils.toAsciiString(ArchiveUtils.toAsciiBytes(inputString))); + } + + private void asciiToByteAndBackFail(final String inputString) { + assertFalse(inputString.equals(ArchiveUtils.toAsciiString(ArchiveUtils.toAsciiBytes(inputString)))); + } +} diff --git a/src/test/java/org/apache/commons/compress/ChainingTestCase.java b/src/test/java/org/apache/commons/compress/ChainingTestCase.java new file mode 100644 index 000000000..62d277970 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/ChainingTestCase.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.junit.Test; + + +public class ChainingTestCase extends AbstractTestCase { + + @Test + public void testTarGzip() throws Exception { + final File file = getFile("bla.tgz"); + final TarArchiveInputStream is = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(file))); + final TarArchiveEntry entry = (TarArchiveEntry)is.getNextEntry(); + assertNotNull(entry); + assertEquals("test1.xml", entry.getName()); + is.close(); + } + + @Test + public void testTarBzip2() throws Exception { + final File file = getFile("bla.tar.bz2"); + final TarArchiveInputStream is = new TarArchiveInputStream(new BZip2CompressorInputStream(new FileInputStream(file))); + final TarArchiveEntry entry = (TarArchiveEntry)is.getNextEntry(); + assertNotNull(entry); + assertEquals("test1.xml", entry.getName()); + is.close(); + } +} diff --git a/src/test/java/org/apache/commons/compress/DetectArchiverTestCase.java b/src/test/java/org/apache/commons/compress/DetectArchiverTestCase.java new file mode 100644 index 000000000..07a0b73a0 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/DetectArchiverTestCase.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress; + +import static org.junit.Assert.*; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; +import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.junit.Test; + +public final class DetectArchiverTestCase extends AbstractTestCase { + + final ClassLoader classLoader = getClass().getClassLoader(); + + @Test + public void testDetectionNotArchive() throws IOException { + try { + getStreamFor("test.txt"); + fail("Expected ArchiveException"); + } catch (final ArchiveException e) { + // expected + } + } + + @Test + public void testCOMPRESS117() throws Exception { + final ArchiveInputStream tar = getStreamFor("COMPRESS-117.tar"); + assertNotNull(tar); + assertTrue(tar instanceof TarArchiveInputStream); + } + + @Test + public void testCOMPRESS335() throws Exception { + final ArchiveInputStream tar = getStreamFor("COMPRESS-335.tar"); + assertNotNull(tar); + assertTrue(tar instanceof TarArchiveInputStream); + } + + @Test + public void testDetection() throws Exception { + + final ArchiveInputStream ar = getStreamFor("bla.ar"); + assertNotNull(ar); + assertTrue(ar instanceof ArArchiveInputStream); + + final ArchiveInputStream tar = getStreamFor("bla.tar"); + assertNotNull(tar); + assertTrue(tar instanceof TarArchiveInputStream); + + final ArchiveInputStream zip = getStreamFor("bla.zip"); + assertNotNull(zip); + assertTrue(zip instanceof ZipArchiveInputStream); + + final ArchiveInputStream jar = getStreamFor("bla.jar"); + assertNotNull(jar); + assertTrue(jar instanceof ZipArchiveInputStream); + + final ArchiveInputStream cpio = getStreamFor("bla.cpio"); + assertNotNull(cpio); + assertTrue(cpio instanceof CpioArchiveInputStream); + + final ArchiveInputStream arj = getStreamFor("bla.arj"); + assertNotNull(arj); + assertTrue(arj instanceof ArjArchiveInputStream); + +// Not yet implemented +// final ArchiveInputStream tgz = getStreamFor("bla.tgz"); +// assertNotNull(tgz); +// assertTrue(tgz instanceof TarArchiveInputStream); + + } + + private ArchiveInputStream getStreamFor(final String resource) + throws ArchiveException, IOException { + return factory.createArchiveInputStream( + new BufferedInputStream(new FileInputStream( + getFile(resource)))); + } + + // Check that the empty archives created by the code are readable + + // Not possible to detect empty "ar" archive as it is completely empty +// public void testEmptyArArchive() throws Exception { +// emptyArchive("ar"); +// } + + @Test + public void testEmptyCpioArchive() throws Exception { + checkEmptyArchive("cpio"); + } + + @Test + public void testEmptyJarArchive() throws Exception { + checkEmptyArchive("jar"); + } + + // empty tar archives just have 512 null bytes +// public void testEmptyTarArchive() throws Exception { +// checkEmptyArchive("tar"); +// } + @Test + public void testEmptyZipArchive() throws Exception { + checkEmptyArchive("zip"); + } + + private void checkEmptyArchive(final String type) throws Exception{ + final File ar = createEmptyArchive(type); // will be deleted by tearDown() + ar.deleteOnExit(); // Just in case file cannot be deleted + ArchiveInputStream ais = null; + BufferedInputStream in = null; + try { + in = new BufferedInputStream(new FileInputStream(ar)); + ais = factory.createArchiveInputStream(in); + } catch (final ArchiveException ae) { + fail("Should have recognized empty archive for "+type); + } finally { + if (ais != null) { + ais.close(); // will close input as well + } else if (in != null){ + in.close(); + } + } + } +} diff --git a/src/test/java/org/apache/commons/compress/IOMethodsTest.java b/src/test/java/org/apache/commons/compress/IOMethodsTest.java new file mode 100644 index 000000000..481a9d51a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/IOMethodsTest.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress; + +import static org.junit.Assert.*; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ar.ArArchiveEntry; +import org.apache.commons.compress.archivers.cpio.CpioArchiveEntry; +import org.apache.commons.compress.archivers.jar.JarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.junit.Test; + +/** + * Check that the different write methods create the same output. + * TODO perform the same checks for reads. + */ +public class IOMethodsTest extends AbstractTestCase { + + private static final int bytesToTest = 50; + private static final byte[] byteTest = new byte[bytesToTest]; + static { + for(int i=0; i < byteTest.length ;) { + byteTest[i]=(byte) i; + byteTest[i+1]=(byte) -i; + i += 2; + } + } + + @Test + public void testWriteAr() throws Exception { + final ArchiveEntry entry = new ArArchiveEntry("dummy", bytesToTest); + compareWrites("ar", entry); + } + + @Test + public void testWriteCpio() throws Exception { + final ArchiveEntry entry = new CpioArchiveEntry("dummy", bytesToTest); + compareWrites("cpio", entry); + } + + @Test + public void testWriteJar() throws Exception { + final ArchiveEntry entry = new JarArchiveEntry("dummy"); + compareWrites("jar", entry); + } + + @Test + public void testWriteTar() throws Exception { + final TarArchiveEntry entry = new TarArchiveEntry("dummy"); + entry.setSize(bytesToTest); + compareWrites("tar", entry); + } + + @Test + public void testWriteZip() throws Exception { + final ArchiveEntry entry = new ZipArchiveEntry("dummy"); + compareWrites("zip", entry); + } + + @Test + public void testReadAr() throws Exception { + compareReads("ar"); + } + + @Test + public void testReadCpio() throws Exception { + compareReads("cpio"); + } + + @Test + public void testReadJar() throws Exception { + compareReads("jar"); + } + + @Test + public void testReadTar() throws Exception { + compareReads("tar"); + } + + @Test + public void testReadZip() throws Exception { + compareReads("zip"); + } + + private void compareWrites(final String archiverName, final ArchiveEntry entry) throws Exception { + final OutputStream out1 = new ByteArrayOutputStream(); + final OutputStream out2 = new ByteArrayOutputStream(); + final OutputStream out3 = new ByteArrayOutputStream(); + final ArchiveOutputStream aos1 = factory.createArchiveOutputStream(archiverName, out1); + aos1.putArchiveEntry(entry); + final ArchiveOutputStream aos2 = factory.createArchiveOutputStream(archiverName, out2); + aos2.putArchiveEntry(entry); + final ArchiveOutputStream aos3 = factory.createArchiveOutputStream(archiverName, out3); + aos3.putArchiveEntry(entry); + for (final byte element : byteTest) { + aos1.write(element); + } + aos1.closeArchiveEntry(); + aos1.close(); + + aos2.write(byteTest); + aos2.closeArchiveEntry(); + aos2.close(); + + aos3.write(byteTest, 0, byteTest.length); + aos3.closeArchiveEntry(); + aos3.close(); + assertEquals("aos1Bytes!=aos2Bytes",aos1.getBytesWritten(),aos2.getBytesWritten()); + assertEquals("aos1Bytes!=aos3Bytes",aos1.getBytesWritten(),aos3.getBytesWritten()); + assertEquals("out1Len!=out2Len",out1.toString().length(),out2.toString().length()); + assertEquals("out1Len!=out2Len",out1.toString().length(),out3.toString().length()); + assertEquals("out1!=out2",out1.toString(),out2.toString()); + assertEquals("out1!=out3",out1.toString(),out3.toString()); + } + + private void compareReads(final String archiverName) throws Exception { + final OutputStream out1 = new ByteArrayOutputStream(); + final OutputStream out2 = new ByteArrayOutputStream(); + final OutputStream out3 = new ByteArrayOutputStream(); + final File file = createSingleEntryArchive(archiverName); + file.deleteOnExit(); + + final InputStream is1 = new FileInputStream(file); + final ArchiveInputStream ais1 = factory.createArchiveInputStream(archiverName, is1); + final ArchiveEntry nextEntry = ais1.getNextEntry(); + assertNotNull(nextEntry); + + final byte [] buff = new byte[10]; // small so multiple reads are needed; + final long size = nextEntry.getSize(); + if (size != ArchiveEntry.SIZE_UNKNOWN) { + assertTrue("Size should be > 0, found: "+size, size > 0); + } + + final InputStream is2 = new FileInputStream(file); + final ArchiveInputStream ais2 = factory.createArchiveInputStream(archiverName, is2); + final ArchiveEntry nextEntry2 = ais2.getNextEntry(); + assertNotNull(nextEntry2); + assertEquals("Expected same entry size", size, nextEntry2.getSize()); + + final InputStream is3 = new FileInputStream(file); + final ArchiveInputStream ais3 = factory.createArchiveInputStream(archiverName, is3); + final ArchiveEntry nextEntry3 = ais3.getNextEntry(); + assertNotNull(nextEntry3); + assertEquals("Expected same entry size", size, nextEntry3.getSize()); + + int b; + while((b=ais1.read()) != -1){ + out1.write(b); + } + ais1.close(); + + int bytes; + while((bytes = ais2.read(buff)) > 0){ + out2.write(buff, 0, bytes); + } + ais2.close(); + + while((bytes=ais3.read(buff, 0 , buff.length)) > 0){ + out3.write(buff, 0, bytes); + } + ais3.close(); + + assertEquals("out1Len!=out2Len",out1.toString().length(),out2.toString().length()); + assertEquals("out1Len!=out3Len",out1.toString().length(),out3.toString().length()); + assertEquals("out1!=out2",out1.toString(),out2.toString()); + assertEquals("out1!=out3",out1.toString(),out3.toString()); + } +} diff --git a/src/test/java/org/apache/commons/compress/MockEvilInputStream.java b/src/test/java/org/apache/commons/compress/MockEvilInputStream.java new file mode 100644 index 000000000..9c03a2124 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/MockEvilInputStream.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Simple mock InputStream that always throws an IOException + * when {@link #read()} or {@link #read(byte[], int, int)} + * is called. + */ +public class MockEvilInputStream extends InputStream { + + @Override + public int read() throws IOException { + throw new IOException("Evil"); + } + + @Override + public int read(byte[] bytes, int offset, int length) throws IOException { + throw new IOException("Evil"); + } +} + diff --git a/src/test/java/org/apache/commons/compress/OsgiITest.java b/src/test/java/org/apache/commons/compress/OsgiITest.java new file mode 100644 index 000000000..fe13d7119 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/OsgiITest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress; + +import static org.ops4j.pax.exam.CoreOptions.bundle; +import static org.ops4j.pax.exam.CoreOptions.composite; +import static org.ops4j.pax.exam.CoreOptions.mavenBundle; +import static org.ops4j.pax.exam.CoreOptions.systemProperty; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.ops4j.pax.exam.Configuration; +import org.ops4j.pax.exam.Option; +import org.ops4j.pax.exam.junit.PaxExam; + +@RunWith(PaxExam.class) +public class OsgiITest { + + @Configuration + public Option[] config() { + return new Option[] { + systemProperty("pax.exam.osgi.unresolved.fail").value("true"), + mavenBundle().groupId("org.apache.felix").artifactId("org.apache.felix.scr") + .version("2.0.14"), + mavenBundle().groupId("org.apache.felix").artifactId("org.apache.felix.configadmin") + .version("1.8.16"), + composite(systemProperty("pax.exam.invoker").value("junit"), + bundle("link:classpath:META-INF/links/org.ops4j.pax.tipi.junit.link"), + bundle("link:classpath:META-INF/links/org.ops4j.pax.exam.invoker.junit.link"), + mavenBundle().groupId("org.apache.servicemix.bundles") + .artifactId("org.apache.servicemix.bundles.hamcrest").version("1.3_1")), + bundle("reference:file:target/classes/").start() + }; + } + + @Test + public void loadBundle() { + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/ArTestCase.java b/src/test/java/org/apache/commons/compress/archivers/ArTestCase.java new file mode 100644 index 000000000..fadb7c3af --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/ArTestCase.java @@ -0,0 +1,373 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.*; + +import java.io.BufferedInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ar.ArArchiveEntry; +import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; +import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Ignore; +import org.junit.Test; + +public final class ArTestCase extends AbstractTestCase { + + @Test + public void testArArchiveCreation() throws Exception { + final File output = new File(dir, "bla.ar"); + + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + + final OutputStream out = new FileOutputStream(output); + final ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream("ar", out); + os.putArchiveEntry(new ArArchiveEntry("test1.xml", file1.length())); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + + os.putArchiveEntry(new ArArchiveEntry("test2.xml", file2.length())); + IOUtils.copy(new FileInputStream(file2), os); + os.closeArchiveEntry(); + + os.close(); + } + + @Test + public void testArUnarchive() throws Exception { + final File output = new File(dir, "bla.ar"); + { + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + + final OutputStream out = new FileOutputStream(output); + final ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream("ar", out); + os.putArchiveEntry(new ArArchiveEntry("test1.xml", file1.length())); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + + os.putArchiveEntry(new ArArchiveEntry("test2.xml", file2.length())); + IOUtils.copy(new FileInputStream(file2), os); + os.closeArchiveEntry(); + os.close(); + out.close(); + } + + // UnArArchive Operation + final File input = output; + try (final InputStream is = new FileInputStream(input); + final ArchiveInputStream in = new ArchiveStreamFactory() + .createArchiveInputStream(new BufferedInputStream(is))) { + final ArArchiveEntry entry = (ArArchiveEntry) in.getNextEntry(); + + final File target = new File(dir, entry.getName()); + try (final OutputStream out = new FileOutputStream(target)) { + IOUtils.copy(in, out); + } + } + } + + @Test + public void testArDelete() throws Exception { + final File output = new File(dir, "bla.ar"); + + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + { + // create + + final OutputStream out = new FileOutputStream(output); + final ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream("ar", out); + os.putArchiveEntry(new ArArchiveEntry("test1.xml", file1.length())); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + + os.putArchiveEntry(new ArArchiveEntry("test2.xml", file2.length())); + IOUtils.copy(new FileInputStream(file2), os); + os.closeArchiveEntry(); + os.close(); + out.close(); + } + + assertEquals(8 + + 60 + file1.length() + (file1.length() % 2) + + 60 + file2.length() + (file2.length() % 2), + output.length()); + + final File output2 = new File(dir, "bla2.ar"); + + int copied = 0; + int deleted = 0; + + { + // remove all but one file + + final InputStream is = new FileInputStream(output); + final OutputStream os = new FileOutputStream(output2); + final ArchiveOutputStream aos = new ArchiveStreamFactory().createArchiveOutputStream("ar", os); + final ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(is)); + while(true) { + final ArArchiveEntry entry = (ArArchiveEntry)ais.getNextEntry(); + if (entry == null) { + break; + } + + if ("test1.xml".equals(entry.getName())) { + aos.putArchiveEntry(entry); + IOUtils.copy(ais, aos); + aos.closeArchiveEntry(); + copied++; + } else { + IOUtils.copy(ais, new ByteArrayOutputStream()); + deleted++; + } + + } + ais.close(); + aos.close(); + is.close(); + os.close(); + } + + assertEquals(1, copied); + assertEquals(1, deleted); + assertEquals(8 + + 60 + file1.length() + (file1.length() % 2), + output2.length()); + + long files = 0; + long sum = 0; + + { + final InputStream is = new FileInputStream(output2); + final ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(is)); + while(true) { + final ArArchiveEntry entry = (ArArchiveEntry)ais.getNextEntry(); + if (entry == null) { + break; + } + + IOUtils.copy(ais, new ByteArrayOutputStream()); + + sum += entry.getLength(); + files++; + } + ais.close(); + is.close(); + } + + assertEquals(1, files); + assertEquals(file1.length(), sum); + + } + + // TODO: revisit - does AR not support storing directories? + @Ignore + @Test + public void XtestDirectoryEntryFromFile() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + ArArchiveOutputStream aos = null; + ArArchiveInputStream ais = null; + try { + archive = File.createTempFile("test.", ".ar", tmp[0]); + archive.deleteOnExit(); + aos = new ArArchiveOutputStream(new FileOutputStream(archive)); + final long beforeArchiveWrite = tmp[0].lastModified(); + final ArArchiveEntry in = new ArArchiveEntry(tmp[0], "foo"); + aos.putArchiveEntry(in); + aos.closeArchiveEntry(); + aos.close(); + aos = null; + ais = new ArArchiveInputStream(new FileInputStream(archive)); + final ArArchiveEntry out = ais.getNextArEntry(); + ais.close(); + ais = null; + assertNotNull(out); + assertEquals("foo/", out.getName()); + assertEquals(0, out.getSize()); + // AR stores time with a granularity of 1 second + assertEquals(beforeArchiveWrite / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertTrue(out.isDirectory()); + } finally { + if (ais != null) { + ais.close(); + } + if (aos != null) { + aos.close(); + } + tryHardToDelete(archive); + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + // TODO: revisit - does AR not support storing directories? + @Ignore + @Test + public void XtestExplicitDirectoryEntry() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + ArArchiveOutputStream aos = null; + ArArchiveInputStream ais = null; + try { + archive = File.createTempFile("test.", ".ar", tmp[0]); + archive.deleteOnExit(); + aos = new ArArchiveOutputStream(new FileOutputStream(archive)); + final long beforeArchiveWrite = tmp[0].lastModified(); + final ArArchiveEntry in = new ArArchiveEntry("foo", 0, 0, 0, 0, + tmp[1].lastModified() / 1000); + aos.putArchiveEntry(in); + aos.closeArchiveEntry(); + aos.close(); + aos = null; + ais = new ArArchiveInputStream(new FileInputStream(archive)); + final ArArchiveEntry out = ais.getNextArEntry(); + ais.close(); + ais = null; + assertNotNull(out); + assertEquals("foo/", out.getName()); + assertEquals(0, out.getSize()); + assertEquals(beforeArchiveWrite / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertTrue(out.isDirectory()); + } finally { + if (ais != null) { + ais.close(); + } + if (aos != null) { + aos.close(); + } + tryHardToDelete(archive); + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testFileEntryFromFile() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + ArArchiveOutputStream aos = null; + ArArchiveInputStream ais = null; + FileInputStream fis = null; + try { + archive = File.createTempFile("test.", ".ar", tmp[0]); + archive.deleteOnExit(); + aos = new ArArchiveOutputStream(new FileOutputStream(archive)); + final ArArchiveEntry in = new ArArchiveEntry(tmp[1], "foo"); + aos.putArchiveEntry(in); + final byte[] b = new byte[(int) tmp[1].length()]; + fis = new FileInputStream(tmp[1]); + while (fis.read(b) > 0) { + aos.write(b); + } + fis.close(); + fis = null; + aos.closeArchiveEntry(); + aos.close(); + aos = null; + ais = new ArArchiveInputStream(new FileInputStream(archive)); + final ArArchiveEntry out = ais.getNextArEntry(); + ais.close(); + ais = null; + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(tmp[1].length(), out.getSize()); + // AR stores time with a granularity of 1 second + assertEquals(tmp[1].lastModified() / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertFalse(out.isDirectory()); + } finally { + if (ais != null) { + ais.close(); + } + if (aos != null) { + aos.close(); + } + tryHardToDelete(archive); + if (fis != null) { + fis.close(); + } + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testExplicitFileEntry() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + ArArchiveOutputStream aos = null; + ArArchiveInputStream ais = null; + FileInputStream fis = null; + try { + archive = File.createTempFile("test.", ".ar", tmp[0]); + archive.deleteOnExit(); + aos = new ArArchiveOutputStream(new FileOutputStream(archive)); + final ArArchiveEntry in = new ArArchiveEntry("foo", tmp[1].length(), + 0, 0, 0, + tmp[1].lastModified() / 1000); + aos.putArchiveEntry(in); + final byte[] b = new byte[(int) tmp[1].length()]; + fis = new FileInputStream(tmp[1]); + while (fis.read(b) > 0) { + aos.write(b); + } + fis.close(); + fis = null; + aos.closeArchiveEntry(); + aos.close(); + aos = null; + ais = new ArArchiveInputStream(new FileInputStream(archive)); + final ArArchiveEntry out = ais.getNextArEntry(); + ais.close(); + ais = null; + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(tmp[1].length(), out.getSize()); + assertEquals(tmp[1].lastModified() / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertFalse(out.isDirectory()); + } finally { + if (ais != null) { + ais.close(); + } + if (aos != null) { + aos.close(); + } + tryHardToDelete(archive); + if (fis != null) { + fis.close(); + } + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/ArchiveOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/ArchiveOutputStreamTest.java new file mode 100644 index 000000000..0ed878b4d --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/ArchiveOutputStreamTest.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.*; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ar.ArArchiveEntry; +import org.apache.commons.compress.archivers.cpio.CpioArchiveEntry; +import org.apache.commons.compress.archivers.jar.JarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public class ArchiveOutputStreamTest extends AbstractTestCase { + + @Test + public void testFinish() throws Exception { + final OutputStream out1 = new ByteArrayOutputStream(); + + ArchiveOutputStream aos1 = factory.createArchiveOutputStream("zip", out1); + aos1.putArchiveEntry(new ZipArchiveEntry("dummy")); + try { + aos1.finish(); + fail("After putArchive should follow closeArchive"); + } catch (final IOException io) { + // Exception expected + } + + aos1 = factory.createArchiveOutputStream("jar", out1); + aos1.putArchiveEntry(new JarArchiveEntry("dummy")); + try { + aos1.finish(); + fail("After putArchive should follow closeArchive"); + } catch (final IOException io) { + // Exception expected + } + + aos1 = factory.createArchiveOutputStream("ar", out1); + aos1.putArchiveEntry(new ArArchiveEntry("dummy", 100)); + try { + aos1.finish(); + fail("After putArchive should follow closeArchive"); + } catch (final IOException io) { + // Exception expected + } + + aos1 = factory.createArchiveOutputStream("cpio", out1); + aos1.putArchiveEntry(new CpioArchiveEntry("dummy")); + try { + aos1.finish(); + fail("After putArchive should follow closeArchive"); + } catch (final IOException io) { + // Exception expected + } + + aos1 = factory.createArchiveOutputStream("tar", out1); + aos1.putArchiveEntry(new TarArchiveEntry("dummy")); + try { + aos1.finish(); + fail("After putArchive should follow closeArchive"); + } catch (final IOException io) { + // Exception expected + } + } + + @Test + public void testOptionalFinish() throws Exception { + final OutputStream out1 = new ByteArrayOutputStream(); + + try (ArchiveOutputStream aos1 = factory.createArchiveOutputStream("zip", out1)) { + aos1.putArchiveEntry(new ZipArchiveEntry("dummy")); + aos1.closeArchiveEntry(); + } + + final ArchiveOutputStream finishTest; + try (ArchiveOutputStream aos1 = factory.createArchiveOutputStream("jar", out1)) { + finishTest = aos1; + aos1.putArchiveEntry(new JarArchiveEntry("dummy")); + aos1.closeArchiveEntry(); + } + try { + finishTest.finish(); + fail("finish() cannot follow close()"); + } catch (final IOException io) { + // Exception expected + } + finishTest.close(); + } + + @Test + public void testCallSequenceAr() throws Exception{ + doCallSequence("Ar"); + } + + @Test + public void testCallSequenceCpio() throws Exception{ + doCallSequence("Cpio"); + } + + @Test + public void testCallSequenceJar() throws Exception{ + doCallSequence("Jar"); + } + + @Test + public void testCallSequenceTar() throws Exception{ + doCallSequence("Tar"); + } + + @Test + public void testCallSequenceZip() throws Exception{ + doCallSequence("Zip"); + } + + private void doCallSequence(final String archiveType) throws Exception { + final OutputStream out1 = new ByteArrayOutputStream(); + final File dummy = getFile("test1.xml"); // need a real file + + ArchiveOutputStream aos1; + aos1 = factory.createArchiveOutputStream(archiveType, out1); + aos1.putArchiveEntry(aos1.createArchiveEntry(dummy, "dummy")); + try (InputStream is = new FileInputStream(dummy)) { + IOUtils.copy(is, aos1); + } + aos1.closeArchiveEntry(); + aos1.close(); // omitted finish + + // TODO - check if archives ensure that data has been written to the stream? + + aos1 = factory.createArchiveOutputStream(archiveType, out1); + try { + aos1.closeArchiveEntry(); + fail("Should have raised IOException - closeArchiveEntry() called before putArchiveEntry()"); + } catch (final IOException expected) { + } + + aos1.putArchiveEntry(aos1.createArchiveEntry(dummy, "dummy")); + try (InputStream is = new FileInputStream(dummy)) { + IOUtils.copy(is, aos1); + } + + // TODO check if second putArchiveEntry() can follow without closeAE? + + try { + aos1.finish(); + fail("Should have raised IOException - finish() called before closeArchiveEntry()"); + } catch (final IOException expected) { + } + try { + aos1.close(); + fail("Should have raised IOException - close() called before closeArchiveEntry()"); + } catch (final IOException expected) { + } + + aos1 = createArchiveWithDummyEntry(archiveType, out1, dummy); + aos1.closeArchiveEntry(); + try { + aos1.closeArchiveEntry(); + fail("Should have raised IOException - closeArchiveEntry() called with no open entry"); + } catch (final IOException expected) { + } + + aos1 = createArchiveWithDummyEntry(archiveType, out1, dummy); + aos1.closeArchiveEntry(); + aos1.finish(); + aos1.close(); + try { + aos1.finish(); + fail("Should have raised IOException - finish() called after close()"); + } catch (final IOException expected) { + } + } + + private ArchiveOutputStream createArchiveWithDummyEntry(String archiveType, OutputStream out1, File dummy) + throws Exception { + ArchiveOutputStream aos1 = factory.createArchiveOutputStream(archiveType, out1); + aos1.putArchiveEntry(aos1.createArchiveEntry(dummy, "dummy")); + try (InputStream is = new FileInputStream(dummy)) { + IOUtils.copy(is, aos1); + } + return aos1; + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/ArchiveServiceLoaderTest.java b/src/test/java/org/apache/commons/compress/archivers/ArchiveServiceLoaderTest.java new file mode 100644 index 000000000..910797dac --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/ArchiveServiceLoaderTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; + +import org.apache.commons.compress.archivers.TestArchiveStreamProvider.ArchiveInvocationConfirmationException; +import org.junit.Test; + +public class ArchiveServiceLoaderTest { + + @Test(expected = ArchiveInvocationConfirmationException.class) + public void testInputStream() throws ArchiveException { + new ArchiveStreamFactory().createArchiveInputStream("ArchiveTestInput1", new ByteArrayInputStream(new byte[] {})); + } + + @Test(expected = ArchiveInvocationConfirmationException.class) + public void testOutputStream() throws ArchiveException { + new ArchiveStreamFactory().createArchiveOutputStream("ArchiveTestOutput1", new ByteArrayOutputStream()); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/ArchiveStreamFactoryTest.java b/src/test/java/org/apache/commons/compress/archivers/ArchiveStreamFactoryTest.java new file mode 100644 index 000000000..f7113ac04 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/ArchiveStreamFactoryTest.java @@ -0,0 +1,433 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Field; + +import org.apache.commons.compress.MockEvilInputStream; +import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; +import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.junit.Test; + +public class ArchiveStreamFactoryTest { + + private static final String UNKNOWN = "??"; + + /** + * see https://issues.apache.org/jira/browse/COMPRESS-171 + */ + @Test + public void shortTextFilesAreNoTARs() throws Exception { + try { + new ArchiveStreamFactory() + .createArchiveInputStream(new ByteArrayInputStream("This certainly is not a tar archive, really, no kidding".getBytes())); + fail("created an input stream for a non-archive"); + } catch (final ArchiveException ae) { + assertTrue(ae.getMessage().startsWith("No Archiver found")); + } + } + + /** + * see https://issues.apache.org/jira/browse/COMPRESS-191 + */ + @Test + public void aiffFilesAreNoTARs() throws Exception { + try (FileInputStream fis = new FileInputStream("src/test/resources/testAIFF.aif")) { + try (InputStream is = new BufferedInputStream(fis)) { + new ArchiveStreamFactory().createArchiveInputStream(is); + fail("created an input stream for a non-archive"); + } catch (final ArchiveException ae) { + assertTrue(ae.getMessage().startsWith("No Archiver found")); + } + } + } + + @Test + public void testCOMPRESS209() throws Exception { + try (FileInputStream fis = new FileInputStream("src/test/resources/testCompress209.doc")) { + try (InputStream bis = new BufferedInputStream(fis)) { + new ArchiveStreamFactory().createArchiveInputStream(bis); + fail("created an input stream for a non-archive"); + } catch (final ArchiveException ae) { + assertTrue(ae.getMessage().startsWith("No Archiver found")); + } + } + } + + @Test(expected = StreamingNotSupportedException.class) + public void cantRead7zFromStream() throws Exception { + new ArchiveStreamFactory() + .createArchiveInputStream(ArchiveStreamFactory.SEVEN_Z, + new ByteArrayInputStream(new byte[0])); + } + + @Test(expected = StreamingNotSupportedException.class) + public void cantWrite7zToStream() throws Exception { + new ArchiveStreamFactory() + .createArchiveOutputStream(ArchiveStreamFactory.SEVEN_Z, + new ByteArrayOutputStream()); + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-267" + * >COMPRESS-267</a>. + */ + @Test + public void detectsAndThrowsFor7z() throws Exception { + try (FileInputStream fis = new FileInputStream("src/test/resources/bla.7z")) { + try (InputStream bis = new BufferedInputStream(fis)) { + new ArchiveStreamFactory().createArchiveInputStream(bis); + fail("Expected a StreamingNotSupportedException"); + } catch (final StreamingNotSupportedException ex) { + assertEquals(ArchiveStreamFactory.SEVEN_Z, ex.getFormat()); + } + } + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-208" + * >COMPRESS-208</a>. + */ + @Test + public void skipsPK00Prefix() throws Exception { + try (FileInputStream fis = new FileInputStream("src/test/resources/COMPRESS-208.zip")) { + try (InputStream bis = new BufferedInputStream(fis)) { + try (ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(bis)) { + assertTrue(ais instanceof ZipArchiveInputStream); + } + } + } + } + + @Test + public void testEncodingCtor() { + ArchiveStreamFactory fac = new ArchiveStreamFactory(); + assertNull(fac.getEntryEncoding()); + fac = new ArchiveStreamFactory(null); + assertNull(fac.getEntryEncoding()); + fac = new ArchiveStreamFactory("UTF-8"); + assertEquals("UTF-8", fac.getEntryEncoding()); + } + + @Test + @SuppressWarnings("deprecation") + public void testEncodingDeprecated() { + ArchiveStreamFactory fac = new ArchiveStreamFactory(); + assertNull(fac.getEntryEncoding()); + fac.setEntryEncoding("UTF-8"); + assertEquals("UTF-8", fac.getEntryEncoding()); + fac.setEntryEncoding("US_ASCII"); + assertEquals("US_ASCII", fac.getEntryEncoding()); + fac = new ArchiveStreamFactory("UTF-8"); + assertEquals("UTF-8", fac.getEntryEncoding()); + try { + fac.setEntryEncoding("US_ASCII"); + fail("Expected IllegalStateException"); + } catch (final IllegalStateException ise) { + // expected + } + } + + static class TestData { + final String testFile; + final String expectedEncoding; + final ArchiveStreamFactory fac; + final String fieldName; + final String type; + final boolean hasOutputStream; + + TestData(final String testFile, final String type, final boolean hasOut, final String expectedEncoding, final ArchiveStreamFactory fac, final String fieldName) { + this.testFile = testFile; + this.expectedEncoding = expectedEncoding; + this.fac = fac; + this.fieldName = fieldName; + this.type = type; + this.hasOutputStream = hasOut; + } + + @Override + public String toString() { + return "TestData [testFile=" + testFile + ", expectedEncoding=" + expectedEncoding + ", fac=" + fac + + ", fieldName=" + fieldName + ", type=" + type + ", hasOutputStream=" + hasOutputStream + "]"; + } + } + + @SuppressWarnings("deprecation") // test of deprecated method + static ArchiveStreamFactory getFactory(final String entryEncoding) { + final ArchiveStreamFactory fac = new ArchiveStreamFactory(); + fac.setEntryEncoding(entryEncoding); + return fac; + } + // The different factory types + private static final ArchiveStreamFactory FACTORY = new ArchiveStreamFactory(); + private static final ArchiveStreamFactory FACTORY_UTF8 = new ArchiveStreamFactory("UTF-8"); + private static final ArchiveStreamFactory FACTORY_ASCII = new ArchiveStreamFactory("ASCII"); + private static final ArchiveStreamFactory FACTORY_SET_UTF8 = getFactory("UTF-8"); + private static final ArchiveStreamFactory FACTORY_SET_ASCII = getFactory("ASCII"); + + // Default encoding if none is provided (not even null) + // The test currently assumes that the output default is the same as the input default + private static final String ARJ_DEFAULT; + private static final String DUMP_DEFAULT; + + private static final String ZIP_DEFAULT = getField(new ZipArchiveInputStream(null),"encoding"); + private static final String CPIO_DEFAULT = getField(new CpioArchiveInputStream(null),"encoding"); + private static final String TAR_DEFAULT = getField(new TarArchiveInputStream(null),"encoding"); + private static final String JAR_DEFAULT = getField(new JarArchiveInputStream(null),"encoding"); + + static { + String dflt; + dflt = UNKNOWN; + try { + dflt = getField(new ArjArchiveInputStream(new FileInputStream(getFile("bla.arj"))), "charsetName"); + } catch (final Exception e) { + e.printStackTrace(); + } + ARJ_DEFAULT = dflt; + dflt = UNKNOWN; + try { + dflt = getField(new DumpArchiveInputStream(new FileInputStream(getFile("bla.dump"))), "encoding"); + } catch (final Exception e) { + e.printStackTrace(); + } + DUMP_DEFAULT = dflt; + } + + @Test + public void testDetect() throws Exception { + for (String extension : new String[]{ + ArchiveStreamFactory.AR, + ArchiveStreamFactory.ARJ, + ArchiveStreamFactory.CPIO, + ArchiveStreamFactory.DUMP, + // Compress doesn't know how to detect JARs, see COMPRESS-91 + // ArchiveStreamFactory.JAR, + ArchiveStreamFactory.SEVEN_Z, + ArchiveStreamFactory.TAR, + ArchiveStreamFactory.ZIP + }) { + assertEquals(extension, detect("bla."+extension)); + } + + try { + ArchiveStreamFactory.detect(new BufferedInputStream(new ByteArrayInputStream(new byte[0]))); + fail("shouldn't be able to detect empty stream"); + } catch (ArchiveException e) { + assertEquals("No Archiver found for the stream signature", e.getMessage()); + } + + try { + ArchiveStreamFactory.detect(null); + fail("shouldn't be able to detect null stream"); + } catch (IllegalArgumentException e) { + assertEquals("Stream must not be null.", e.getMessage()); + } + + try { + ArchiveStreamFactory.detect(new BufferedInputStream(new MockEvilInputStream())); + fail("Expected ArchiveException"); + } catch (ArchiveException e) { + assertEquals("IOException while reading signature.", e.getMessage()); + } + } + + private String detect(String resource) throws IOException, ArchiveException { + try(InputStream in = new BufferedInputStream(new FileInputStream( + getFile(resource)))) { + return ArchiveStreamFactory.detect(in); + } + } + + static final TestData[] TESTS = { + new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, ARJ_DEFAULT, FACTORY, "charsetName"), + new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, "UTF-8", FACTORY_UTF8, "charsetName"), + new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, "ASCII", FACTORY_ASCII, "charsetName"), + new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, "UTF-8", FACTORY_SET_UTF8, "charsetName"), + new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, "ASCII", FACTORY_SET_ASCII, "charsetName"), + + new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, CPIO_DEFAULT, FACTORY, "encoding"), + new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, "UTF-8", FACTORY_UTF8, "encoding"), + new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, "ASCII", FACTORY_ASCII, "encoding"), + new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, "UTF-8", FACTORY_SET_UTF8, "encoding"), + new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, "ASCII", FACTORY_SET_ASCII, "encoding"), + + new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, DUMP_DEFAULT, FACTORY, "encoding"), + new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, "UTF-8", FACTORY_UTF8, "encoding"), + new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, "ASCII", FACTORY_ASCII, "encoding"), + new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, "UTF-8", FACTORY_SET_UTF8, "encoding"), + new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, "ASCII", FACTORY_SET_ASCII, "encoding"), + + new TestData("bla.tar", ArchiveStreamFactory.TAR, true, TAR_DEFAULT, FACTORY, "encoding"), + new TestData("bla.tar", ArchiveStreamFactory.TAR, true, "UTF-8", FACTORY_UTF8, "encoding"), + new TestData("bla.tar", ArchiveStreamFactory.TAR, true, "ASCII", FACTORY_ASCII, "encoding"), + new TestData("bla.tar", ArchiveStreamFactory.TAR, true, "UTF-8", FACTORY_SET_UTF8, "encoding"), + new TestData("bla.tar", ArchiveStreamFactory.TAR, true, "ASCII", FACTORY_SET_ASCII, "encoding"), + + new TestData("bla.jar", ArchiveStreamFactory.JAR, true, JAR_DEFAULT, FACTORY, "encoding"), + new TestData("bla.jar", ArchiveStreamFactory.JAR, true, "UTF-8", FACTORY_UTF8, "encoding"), + new TestData("bla.jar", ArchiveStreamFactory.JAR, true, "ASCII", FACTORY_ASCII, "encoding"), + new TestData("bla.jar", ArchiveStreamFactory.JAR, true, "UTF-8", FACTORY_SET_UTF8, "encoding"), + new TestData("bla.jar", ArchiveStreamFactory.JAR, true, "ASCII", FACTORY_SET_ASCII, "encoding"), + + new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, ZIP_DEFAULT, FACTORY, "encoding"), + new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, "UTF-8", FACTORY_UTF8, "encoding"), + new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, "ASCII", FACTORY_ASCII, "encoding"), + new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, "UTF-8", FACTORY_SET_UTF8, "encoding"), + new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, "ASCII", FACTORY_SET_ASCII, "encoding"), + }; + + @Test + public void testEncodingInputStreamAutodetect() throws Exception { + int failed = 0; + for (int i = 1; i <= TESTS.length; i++) { + final TestData test = TESTS[i - 1]; + try (final ArchiveInputStream ais = getInputStreamFor(test.testFile, test.fac)) { + final String field = getField(ais, test.fieldName); + if (!eq(test.expectedEncoding, field)) { + System.out.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + field + + " type: " + test.type); + failed++; + } + } + } + if (failed > 0) { + fail("Tests failed: " + failed + " out of " + TESTS.length); + } + } + + @Test + public void testEncodingInputStream() throws Exception { + int failed = 0; + for (int i = 1; i <= TESTS.length; i++) { + final TestData test = TESTS[i - 1]; + try (final ArchiveInputStream ais = getInputStreamFor(test.type, test.testFile, test.fac)) { + final String field = getField(ais, test.fieldName); + if (!eq(test.expectedEncoding, field)) { + System.out.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + field + + " type: " + test.type); + failed++; + } + } + } + if (failed > 0) { + fail("Tests failed: " + failed + " out of " + TESTS.length); + } + } + + @Test + public void testEncodingOutputStream() throws Exception { + int failed = 0; + for(int i = 1; i <= TESTS.length; i++) { + final TestData test = TESTS[i-1]; + if (test.hasOutputStream) { + try (final ArchiveOutputStream ais = getOutputStreamFor(test.type, test.fac)) { + final String field = getField(ais, test.fieldName); + if (!eq(test.expectedEncoding, field)) { + System.out.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + + field + " type: " + test.type); + failed++; + } + } + } + } + if (failed > 0) { + fail("Tests failed: " + failed + " out of " + TESTS.length); + } + } + + // equals allowing null + private static boolean eq(final String exp, final String act) { + if (exp == null) { + return act == null; + } + return exp.equals(act); + } + + private static String getField(final Object instance, final String name) { + final Class<?> cls = instance.getClass(); + Field fld; + try { + fld = cls.getDeclaredField(name); + } catch (final NoSuchFieldException nsfe) { + try { + fld = cls.getSuperclass().getDeclaredField(name); + } catch (final NoSuchFieldException e) { + System.out.println("Cannot find " + name + " in class " + instance.getClass().getSimpleName()); + return UNKNOWN; + } + } + final boolean isAccessible = fld.isAccessible(); + try { + if (!isAccessible) { + fld.setAccessible(true); + } + final Object object = fld.get(instance); + if (object instanceof String || object == null) { + return (String) object; + } + System.out.println("Wrong type: " + object.getClass().getCanonicalName() + " for " + name + " in class " + instance.getClass().getSimpleName()); + return UNKNOWN; + } catch (final Exception e) { + e.printStackTrace(); + return UNKNOWN; + } finally { + if (!isAccessible) { + fld.setAccessible(isAccessible); + } + } + } + + private ArchiveInputStream getInputStreamFor(final String resource, final ArchiveStreamFactory factory) + throws IOException, ArchiveException { + return factory.createArchiveInputStream( + new BufferedInputStream(new FileInputStream( + getFile(resource)))); + } + + private ArchiveInputStream getInputStreamFor(final String type, final String resource, final ArchiveStreamFactory factory) + throws IOException, ArchiveException { + return factory.createArchiveInputStream( + type, + new BufferedInputStream(new FileInputStream( + getFile(resource)))); + } + + private ArchiveOutputStream getOutputStreamFor(final String type, final ArchiveStreamFactory factory) + throws IOException, ArchiveException { + return factory.createArchiveOutputStream(type, new ByteArrayOutputStream()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/CpioTestCase.java b/src/test/java/org/apache/commons/compress/archivers/CpioTestCase.java new file mode 100644 index 000000000..bad016187 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/CpioTestCase.java @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.cpio.CpioArchiveEntry; +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; +import org.apache.commons.compress.archivers.cpio.CpioConstants; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class CpioTestCase extends AbstractTestCase { + + @Test + public void testCpioArchiveCreation() throws Exception { + final File output = new File(dir, "bla.cpio"); + + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + + final OutputStream out = new FileOutputStream(output); + final ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream("cpio", out); + os.putArchiveEntry(new CpioArchiveEntry("test1.xml", file1.length())); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + + os.putArchiveEntry(new CpioArchiveEntry("test2.xml", file2.length())); + IOUtils.copy(new FileInputStream(file2), os); + os.closeArchiveEntry(); + + os.close(); + out.close(); + } + + @Test + public void testCpioUnarchive() throws Exception { + final File output = new File(dir, "bla.cpio"); + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + final long file1Length = file1.length(); + final long file2Length = file2.length(); + + { + final OutputStream out = new FileOutputStream(output); + final ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream("cpio", out); + CpioArchiveEntry entry = new CpioArchiveEntry("test1.xml", file1Length); + entry.setMode(CpioConstants.C_ISREG); + os.putArchiveEntry(entry); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + + entry = new CpioArchiveEntry("test2.xml", file2Length); + entry.setMode(CpioConstants.C_ISREG); + os.putArchiveEntry(entry); + IOUtils.copy(new FileInputStream(file2), os); + os.closeArchiveEntry(); + os.finish(); + os.close(); + out.close(); + } + + // Unarchive Operation + final File input = output; + final InputStream is = new FileInputStream(input); + final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("cpio", is); + + + final Map<String, File> result = new HashMap<>(); + ArchiveEntry entry = null; + while ((entry = in.getNextEntry()) != null) { + final File cpioget = new File(dir, entry.getName()); + final OutputStream out = new FileOutputStream(cpioget); + IOUtils.copy(in, out); + out.close(); + result.put(entry.getName(), cpioget); + } + in.close(); + is.close(); + + File t = result.get("test1.xml"); + assertTrue("Expected " + t.getAbsolutePath() + " to exist", t.exists()); + assertEquals("length of " + t.getAbsolutePath(), file1Length, t.length()); + + t = result.get("test2.xml"); + assertTrue("Expected " + t.getAbsolutePath() + " to exist", t.exists()); + assertEquals("length of " + t.getAbsolutePath(), file2Length, t.length()); + } + + @Test + public void testDirectoryEntryFromFile() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + CpioArchiveOutputStream tos = null; + CpioArchiveInputStream tis = null; + try { + archive = File.createTempFile("test.", ".cpio", tmp[0]); + archive.deleteOnExit(); + tos = new CpioArchiveOutputStream(new FileOutputStream(archive)); + final long beforeArchiveWrite = tmp[0].lastModified(); + final CpioArchiveEntry in = new CpioArchiveEntry(tmp[0], "foo"); + tos.putArchiveEntry(in); + tos.closeArchiveEntry(); + tos.close(); + tos = null; + tis = new CpioArchiveInputStream(new FileInputStream(archive)); + final CpioArchiveEntry out = tis.getNextCPIOEntry(); + tis.close(); + tis = null; + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(0, out.getSize()); + // CPIO stores time with a granularity of 1 second + assertEquals(beforeArchiveWrite / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertTrue(out.isDirectory()); + } finally { + if (tis != null) { + tis.close(); + } + if (tos != null) { + tos.close(); + } + tryHardToDelete(archive); + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testExplicitDirectoryEntry() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + CpioArchiveOutputStream tos = null; + CpioArchiveInputStream tis = null; + try { + archive = File.createTempFile("test.", ".cpio", tmp[0]); + archive.deleteOnExit(); + tos = new CpioArchiveOutputStream(new FileOutputStream(archive)); + final long beforeArchiveWrite = tmp[0].lastModified(); + final CpioArchiveEntry in = new CpioArchiveEntry("foo/"); + in.setTime(beforeArchiveWrite / 1000); + in.setMode(CpioConstants.C_ISDIR); + tos.putArchiveEntry(in); + tos.closeArchiveEntry(); + tos.close(); + tos = null; + tis = new CpioArchiveInputStream(new FileInputStream(archive)); + final CpioArchiveEntry out = tis.getNextCPIOEntry(); + tis.close(); + tis = null; + assertNotNull(out); + assertEquals("foo/", out.getName()); + assertEquals(0, out.getSize()); + assertEquals(beforeArchiveWrite / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertTrue(out.isDirectory()); + } finally { + if (tis != null) { + tis.close(); + } + if (tos != null) { + tos.close(); + } + tryHardToDelete(archive); + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testFileEntryFromFile() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + CpioArchiveOutputStream tos = null; + CpioArchiveInputStream tis = null; + FileInputStream fis = null; + try { + archive = File.createTempFile("test.", ".cpio", tmp[0]); + archive.deleteOnExit(); + tos = new CpioArchiveOutputStream(new FileOutputStream(archive)); + final CpioArchiveEntry in = new CpioArchiveEntry(tmp[1], "foo"); + tos.putArchiveEntry(in); + final byte[] b = new byte[(int) tmp[1].length()]; + fis = new FileInputStream(tmp[1]); + while (fis.read(b) > 0) { + tos.write(b); + } + fis.close(); + fis = null; + tos.closeArchiveEntry(); + tos.close(); + tos = null; + tis = new CpioArchiveInputStream(new FileInputStream(archive)); + final CpioArchiveEntry out = tis.getNextCPIOEntry(); + tis.close(); + tis = null; + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(tmp[1].length(), out.getSize()); + assertEquals(tmp[1].lastModified() / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertFalse(out.isDirectory()); + } finally { + if (tis != null) { + tis.close(); + } + if (tos != null) { + tos.close(); + } + tryHardToDelete(archive); + if (fis != null) { + fis.close(); + } + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testExplicitFileEntry() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + CpioArchiveOutputStream tos = null; + CpioArchiveInputStream tis = null; + FileInputStream fis = null; + try { + archive = File.createTempFile("test.", ".cpio", tmp[0]); + archive.deleteOnExit(); + tos = new CpioArchiveOutputStream(new FileOutputStream(archive)); + final CpioArchiveEntry in = new CpioArchiveEntry("foo"); + in.setTime(tmp[1].lastModified() / 1000); + in.setSize(tmp[1].length()); + in.setMode(CpioConstants.C_ISREG); + tos.putArchiveEntry(in); + final byte[] b = new byte[(int) tmp[1].length()]; + fis = new FileInputStream(tmp[1]); + while (fis.read(b) > 0) { + tos.write(b); + } + fis.close(); + fis = null; + tos.closeArchiveEntry(); + tos.close(); + tos = null; + tis = new CpioArchiveInputStream(new FileInputStream(archive)); + final CpioArchiveEntry out = tis.getNextCPIOEntry(); + tis.close(); + tis = null; + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(tmp[1].length(), out.getSize()); + assertEquals(tmp[1].lastModified() / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertFalse(out.isDirectory()); + } finally { + if (tis != null) { + tis.close(); + } + if (tos != null) { + tos.close(); + } + tryHardToDelete(archive); + if (fis != null) { + fis.close(); + } + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/DumpTestCase.java b/src/test/java/org/apache/commons/compress/archivers/DumpTestCase.java new file mode 100644 index 000000000..86759027d --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/DumpTestCase.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.*; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class DumpTestCase extends AbstractTestCase { + + @Test + public void testDumpUnarchiveAll() throws Exception { + unarchiveAll(getFile("bla.dump")); + } + + @Test + public void testCompressedDumpUnarchiveAll() throws Exception { + unarchiveAll(getFile("bla.z.dump")); + } + + private void unarchiveAll(final File input) throws Exception { + final InputStream is = new FileInputStream(input); + ArchiveInputStream in = null; + OutputStream out = null; + try { + in = new ArchiveStreamFactory() + .createArchiveInputStream("dump", is); + + ArchiveEntry entry = in.getNextEntry(); + while (entry != null) { + final File archiveEntry = new File(dir, entry.getName()); + archiveEntry.getParentFile().mkdirs(); + if (entry.isDirectory()) { + archiveEntry.mkdir(); + entry = in.getNextEntry(); + continue; + } + out = new FileOutputStream(archiveEntry); + IOUtils.copy(in, out); + out.close(); + out = null; + entry = in.getNextEntry(); + } + } finally { + if (out != null) { + out.close(); + } + if (in != null) { + in.close(); + } + is.close(); + } + } + + @Test + public void testArchiveDetection() throws Exception { + archiveDetection(getFile("bla.dump")); + } + + @Test + public void testCompressedArchiveDetection() throws Exception { + archiveDetection(getFile("bla.z.dump")); + } + + private void archiveDetection(final File f) throws Exception { + try (InputStream is = new FileInputStream(f)) { + assertEquals(DumpArchiveInputStream.class, + new ArchiveStreamFactory() + .createArchiveInputStream(new BufferedInputStream(is)) + .getClass()); + } + } + + @Test + public void testCheckArchive() throws Exception { + checkDumpArchive(getFile("bla.dump")); + } + + @Test + public void testCheckCompressedArchive() throws Exception { + checkDumpArchive(getFile("bla.z.dump")); + } + + private void checkDumpArchive(final File f) throws Exception { + final ArrayList<String> expected = new ArrayList<>(); + expected.add(""); + expected.add("lost+found/"); + expected.add("test1.xml"); + expected.add("test2.xml"); + try (InputStream is = new FileInputStream(f)) { + checkArchiveContent(new DumpArchiveInputStream(is), + expected); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/ExceptionMessageTest.java b/src/test/java/org/apache/commons/compress/archivers/ExceptionMessageTest.java new file mode 100644 index 000000000..e64d79265 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/ExceptionMessageTest.java @@ -0,0 +1,81 @@ +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.*; +import org.junit.Test; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +public class ExceptionMessageTest { + + private static final String ARCHIVER_NULL_MESSAGE = "Archivername must not be null."; + + private static final String INPUTSTREAM_NULL_MESSAGE = "InputStream must not be null."; + + private static final String OUTPUTSTREAM_NULL_MESSAGE = "OutputStream must not be null."; + + + @Test + public void testMessageWhenArchiverNameIsNull_1(){ + try{ + new ArchiveStreamFactory().createArchiveInputStream(null, System.in); + fail("Should raise an IllegalArgumentException."); + }catch (final IllegalArgumentException e) { + assertEquals(ARCHIVER_NULL_MESSAGE, e.getMessage()); + } catch (final ArchiveException e) { + fail("ArchiveException not expected"); + } + } + + @Test + public void testMessageWhenInputStreamIsNull(){ + try{ + new ArchiveStreamFactory().createArchiveInputStream("zip", null); + fail("Should raise an IllegalArgumentException."); + }catch (final IllegalArgumentException e) { + assertEquals(INPUTSTREAM_NULL_MESSAGE, e.getMessage()); + } catch (final ArchiveException e) { + fail("ArchiveException not expected"); + } + } + + @Test + public void testMessageWhenArchiverNameIsNull_2(){ + try{ + new ArchiveStreamFactory().createArchiveOutputStream(null, System.out); + fail("Should raise an IllegalArgumentException."); + } catch (final IllegalArgumentException e) { + assertEquals(ARCHIVER_NULL_MESSAGE, e.getMessage()); + } catch (final ArchiveException e){ + fail("ArchiveException not expected"); + } + } + + @Test + public void testMessageWhenOutputStreamIsNull(){ + try{ + new ArchiveStreamFactory().createArchiveOutputStream("zip", null); + fail("Should raise an IllegalArgumentException."); + } catch (final IllegalArgumentException e) { + assertEquals(OUTPUTSTREAM_NULL_MESSAGE, e.getMessage()); + } catch (final ArchiveException e) { + fail("ArchiveException not expected"); + } + } + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/JarTestCase.java b/src/test/java/org/apache/commons/compress/archivers/JarTestCase.java new file mode 100644 index 000000000..3717e7a53 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/JarTestCase.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class JarTestCase extends AbstractTestCase { + + @Test + public void testJarArchiveCreation() throws Exception { + final File output = new File(dir, "bla.jar"); + + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + + final OutputStream out = new FileOutputStream(output); + + final ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream("jar", out); + + os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + + os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); + IOUtils.copy(new FileInputStream(file2), os); + os.closeArchiveEntry(); + + os.close(); + } + + + @Test + public void testJarUnarchive() throws Exception { + final File input = getFile("bla.jar"); + final InputStream is = new FileInputStream(input); + final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("jar", is); + + ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); + File o = new File(dir, entry.getName()); + o.getParentFile().mkdirs(); + OutputStream out = new FileOutputStream(o); + IOUtils.copy(in, out); + out.close(); + + entry = (ZipArchiveEntry)in.getNextEntry(); + o = new File(dir, entry.getName()); + o.getParentFile().mkdirs(); + out = new FileOutputStream(o); + IOUtils.copy(in, out); + out.close(); + + entry = (ZipArchiveEntry)in.getNextEntry(); + o = new File(dir, entry.getName()); + o.getParentFile().mkdirs(); + out = new FileOutputStream(o); + IOUtils.copy(in, out); + out.close(); + + in.close(); + is.close(); + } + + @Test + public void testJarUnarchiveAll() throws Exception { + final File input = getFile("bla.jar"); + final InputStream is = new FileInputStream(input); + final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("jar", is); + + ArchiveEntry entry = in.getNextEntry(); + while (entry != null) { + final File archiveEntry = new File(dir, entry.getName()); + archiveEntry.getParentFile().mkdirs(); + if(entry.isDirectory()){ + archiveEntry.mkdir(); + entry = in.getNextEntry(); + continue; + } + final OutputStream out = new FileOutputStream(archiveEntry); + IOUtils.copy(in, out); + out.close(); + entry = in.getNextEntry(); + } + + in.close(); + is.close(); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/LongPathTest.java b/src/test/java/org/apache/commons/compress/archivers/LongPathTest.java new file mode 100644 index 000000000..a2f47ba6b --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/LongPathTest.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FilenameFilter; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collection; + +import junit.framework.AssertionFailedError; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test that can read various tar file examples. + * + * Files must be in resources/longpath, and there must be a file.txt containing + * the list of files in the archives. +*/ +@RunWith(Parameterized.class) +public class LongPathTest extends AbstractTestCase { + + private static final ClassLoader CLASSLOADER = LongPathTest.class.getClassLoader(); + private static final File ARCDIR; + private static final ArrayList<String> FILELIST = new ArrayList<>(); + + static { + try { + ARCDIR = new File(CLASSLOADER.getResource("longpath").toURI()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + private final File file; + + + + public LongPathTest(final String file){ + this.file = new File(ARCDIR, file); + } + + @BeforeClass + public static void setUpFileList() throws Exception { + assertTrue(ARCDIR.exists()); + final File listing= new File(ARCDIR,"files.txt"); + assertTrue("files.txt is readable",listing.canRead()); + final BufferedReader br = new BufferedReader(new FileReader(listing)); + String line; + while ((line=br.readLine())!=null){ + if (!line.startsWith("#")){ + FILELIST.add(line); + } + } + br.close(); + } + + @Parameters(name = "file={0}") + public static Collection<Object[]> data() { + final Collection<Object[]> params = new ArrayList<>(); + for (final String f : ARCDIR.list(new FilenameFilter() { + @Override + public boolean accept(final File dir, final String name) { + return !name.endsWith(".txt"); + } + })) + { + params.add(new Object[] { f }); + } + return params; + } + + @Override + protected String getExpectedString(final ArchiveEntry entry) { + if (entry instanceof TarArchiveEntry) { + final TarArchiveEntry tarEntry = (TarArchiveEntry) entry; + if (tarEntry.isSymbolicLink()) { + return tarEntry.getName() + " -> " + tarEntry.getLinkName(); + } + } + return entry.getName(); + } + + @Test + public void testArchive() throws Exception { + @SuppressWarnings("unchecked") // fileList is of correct type + final + ArrayList<String> expected = (ArrayList<String>) FILELIST.clone(); + final String name = file.getName(); + if ("minotaur.jar".equals(name) || "minotaur-0.jar".equals(name)){ + expected.add("META-INF/"); + expected.add("META-INF/MANIFEST.MF"); + } + final ArchiveInputStream ais = factory.createArchiveInputStream(new BufferedInputStream(new FileInputStream(file))); + // check if expected type recognized + if (name.endsWith(".tar")){ + assertTrue(ais instanceof TarArchiveInputStream); + } else if (name.endsWith(".jar") || name.endsWith(".zip")){ + assertTrue(ais instanceof ZipArchiveInputStream); + } else if (name.endsWith(".cpio")){ + assertTrue(ais instanceof CpioArchiveInputStream); + // Hack: cpio does not add trailing "/" to directory names + for(int i=0; i < expected.size(); i++){ + final String ent = expected.get(i); + if (ent.endsWith("/")){ + expected.set(i, ent.substring(0, ent.length()-1)); + } + } + } else if (name.endsWith(".ar")){ + assertTrue(ais instanceof ArArchiveInputStream); + // CPIO does not store directories or directory names + expected.clear(); + for (final String ent : FILELIST) { + if (!ent.endsWith("/")) {// not a directory + final int lastSlash = ent.lastIndexOf('/'); + if (lastSlash >= 0) { // extract path name + expected.add(ent.substring(lastSlash + 1, ent.length())); + } else { + expected.add(ent); + } + } + } + } else { + fail("Unexpected file type: "+name); + } + try { + checkArchiveContent(ais, expected); + } catch (final AssertionFailedError e) { + fail("Error processing "+file.getName()+" "+e); + } finally { + ais.close(); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/LongSymLinkTest.java b/src/test/java/org/apache/commons/compress/archivers/LongSymLinkTest.java new file mode 100644 index 000000000..b4d0cf464 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/LongSymLinkTest.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FilenameFilter; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collection; + +import junit.framework.AssertionFailedError; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test that can read various tar file examples. + * + * Files must be in resources/longsymlink, and there must be a file.txt containing + * the list of files in the archives. +*/ +@RunWith(Parameterized.class) +public class LongSymLinkTest extends AbstractTestCase { + + private static final ClassLoader CLASSLOADER = LongSymLinkTest.class.getClassLoader(); + private static final File ARCDIR; + private static final ArrayList<String> FILELIST = new ArrayList<>(); + + static { + try { + ARCDIR = new File(CLASSLOADER.getResource("longsymlink").toURI()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + private final File file; + + public LongSymLinkTest(final String file){ + this.file = new File(ARCDIR, file); + } + + @BeforeClass + public static void setUpFileList() throws Exception { + assertTrue(ARCDIR.exists()); + final File listing= new File(ARCDIR,"files.txt"); + assertTrue("files.txt is readable",listing.canRead()); + final BufferedReader br = new BufferedReader(new FileReader(listing)); + String line; + while ((line=br.readLine())!=null){ + if (!line.startsWith("#")){ + FILELIST.add(line); + } + } + br.close(); + } + + @Parameters(name = "file={0}") + public static Collection<Object[]> data() { + final Collection<Object[]> params = new ArrayList<>(); + for (final String f : ARCDIR.list(new FilenameFilter() { + @Override + public boolean accept(final File dir, final String name) { + return !name.endsWith(".txt"); + } + })) + { + params.add(new Object[] { f }); + } + return params; + } + + + @Override + protected String getExpectedString(final ArchiveEntry entry) { + if (entry instanceof TarArchiveEntry) { + final TarArchiveEntry tarEntry = (TarArchiveEntry) entry; + if (tarEntry.isSymbolicLink()) { + return tarEntry.getName() + " -> " + tarEntry.getLinkName(); + } + } + return entry.getName(); + } + + @Test + public void testArchive() throws Exception { + @SuppressWarnings("unchecked") // fileList is of correct type + final + ArrayList<String> expected = (ArrayList<String>) FILELIST.clone(); + final String name = file.getName(); + if ("minotaur.jar".equals(name) || "minotaur-0.jar".equals(name)){ + expected.add("META-INF/"); + expected.add("META-INF/MANIFEST.MF"); + } + final ArchiveInputStream ais = factory.createArchiveInputStream(new BufferedInputStream(new FileInputStream(file))); + // check if expected type recognized + if (name.endsWith(".tar")){ + assertTrue(ais instanceof TarArchiveInputStream); + } else if (name.endsWith(".jar") || name.endsWith(".zip")){ + assertTrue(ais instanceof ZipArchiveInputStream); + } else if (name.endsWith(".cpio")){ + assertTrue(ais instanceof CpioArchiveInputStream); + // Hack: cpio does not add trailing "/" to directory names + for(int i=0; i < expected.size(); i++){ + final String ent = expected.get(i); + if (ent.endsWith("/")){ + expected.set(i, ent.substring(0, ent.length()-1)); + } + } + } else if (name.endsWith(".ar")){ + assertTrue(ais instanceof ArArchiveInputStream); + // CPIO does not store directories or directory names + expected.clear(); + for (final String ent : FILELIST) { + if (!ent.endsWith("/")) {// not a directory + final int lastSlash = ent.lastIndexOf('/'); + if (lastSlash >= 0) { // extract path name + expected.add(ent.substring(lastSlash + 1, ent.length())); + } else { + expected.add(ent); + } + } + } + } else { + fail("Unexpected file type: "+name); + } + try { + checkArchiveContent(ais, expected); + } catch (final AssertionFailedError e) { + fail("Error processing "+file.getName()+" "+e); + } finally { + ais.close(); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/SevenZTestCase.java b/src/test/java/org/apache/commons/compress/archivers/SevenZTestCase.java new file mode 100644 index 000000000..0b5bd95f9 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/SevenZTestCase.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.NoSuchAlgorithmException; +import javax.crypto.Cipher; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; +import org.apache.commons.compress.archivers.sevenz.SevenZFile; +import org.apache.commons.compress.archivers.sevenz.SevenZMethod; +import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; + +public class SevenZTestCase extends AbstractTestCase { + + private File output; + private final File file1, file2; + + public SevenZTestCase() throws IOException { + file1 = getFile("test1.xml"); + file2 = getFile("test2.xml"); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + output = new File(dir, "bla.7z"); + } + + @Test + public void testSevenZArchiveCreationUsingCopy() throws Exception { + testSevenZArchiveCreation(SevenZMethod.COPY); + } + + @Test + public void testSevenZArchiveCreationUsingLZMA() throws Exception { + testSevenZArchiveCreation(SevenZMethod.LZMA); + } + + @Test + public void testSevenZArchiveCreationUsingLZMA2() throws Exception { + testSevenZArchiveCreation(SevenZMethod.LZMA2); + } + + @Test + public void testSevenZArchiveCreationUsingBZIP2() throws Exception { + testSevenZArchiveCreation(SevenZMethod.BZIP2); + } + + @Test + public void testSevenZArchiveCreationUsingDeflate() throws Exception { + testSevenZArchiveCreation(SevenZMethod.DEFLATE); + } + + private void testSevenZArchiveCreation(final SevenZMethod method) throws Exception { + createArchive(method); + try (SevenZFile archive = new SevenZFile(output)) { + SevenZArchiveEntry entry; + + entry = archive.getNextEntry(); + assert (entry != null); + assertEquals(entry.getName(), file1.getName()); + + entry = archive.getNextEntry(); + assert (entry != null); + assertEquals(entry.getName(), file2.getName()); + + assert (archive.getNextEntry() == null); + } + } + + private void createArchive(final SevenZMethod method) throws Exception { + final SevenZOutputFile outArchive = new SevenZOutputFile(output); + outArchive.setContentCompression(method); + try { + SevenZArchiveEntry entry; + + entry = outArchive.createArchiveEntry(file1, file1.getName()); + outArchive.putArchiveEntry(entry); + copy(file1, outArchive); + outArchive.closeArchiveEntry(); + + entry = outArchive.createArchiveEntry(file2, file2.getName()); + outArchive.putArchiveEntry(entry); + copy(file2, outArchive); + outArchive.closeArchiveEntry(); + } finally { + outArchive.close(); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingCopy() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.COPY); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingLZMA() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.LZMA); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingLZMA2() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.LZMA2); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingBZIP2() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.BZIP2); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.DEFLATE); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingAES() throws Exception { + assumeStrongCryptoIsAvailable(); + try (SevenZFile archive = new SevenZFile(getFile("bla.encrypted.7z"), "foo".toCharArray())) { + singleByteReadConsistentlyReturnsMinusOneAtEof(archive); + } + } + + private void singleByteReadConsistentlyReturnsMinusOneAtEof(final SevenZMethod method) throws Exception { + createArchive(method); + try (SevenZFile archive = new SevenZFile(output)) { + singleByteReadConsistentlyReturnsMinusOneAtEof(archive); + } + } + + private void singleByteReadConsistentlyReturnsMinusOneAtEof(SevenZFile archive) throws Exception { + SevenZArchiveEntry entry = archive.getNextEntry(); + entry = archive.getNextEntry(); + readFully(archive); + assertEquals(-1, archive.read()); + assertEquals(-1, archive.read()); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingLZMA() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.LZMA); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingLZMA2() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.LZMA2); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingBZIP2() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.BZIP2); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(SevenZMethod.DEFLATE); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingAES() throws Exception { + assumeStrongCryptoIsAvailable(); + try (SevenZFile archive = new SevenZFile(getFile("bla.encrypted.7z"), "foo".toCharArray())) { + multiByteReadConsistentlyReturnsMinusOneAtEof(archive); + } + } + + private void multiByteReadConsistentlyReturnsMinusOneAtEof(final SevenZMethod method) throws Exception { + createArchive(method); + try (SevenZFile archive = new SevenZFile(output)) { + multiByteReadConsistentlyReturnsMinusOneAtEof(archive); + } + } + + private void multiByteReadConsistentlyReturnsMinusOneAtEof(SevenZFile archive) throws Exception { + final byte[] buf = new byte[2]; + SevenZArchiveEntry entry = archive.getNextEntry(); + entry = archive.getNextEntry(); + readFully(archive); + assertEquals(-1, archive.read(buf)); + assertEquals(-1, archive.read(buf)); + } + + private void copy(final File src, final SevenZOutputFile dst) throws IOException { + FileInputStream fis = null; + try { + fis = new FileInputStream(src); + final byte[] buffer = new byte[8*1024]; + int bytesRead; + while ((bytesRead = fis.read(buffer)) >= 0) { + dst.write(buffer, 0, bytesRead); + } + } finally { + if (fis != null) { + fis.close(); + } + } + } + + private void readFully(final SevenZFile archive) throws IOException { + final byte[] buf = new byte[1024]; + int x = 0; + while (0 <= (x = archive.read(buf))) { + ; + } + } + + private static void assumeStrongCryptoIsAvailable() throws NoSuchAlgorithmException { + Assume.assumeTrue("test requires strong crypto", Cipher.getMaxAllowedKeyLength("AES/ECB/PKCS5Padding") >= 256); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java b/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java new file mode 100644 index 000000000..4792583c5 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java @@ -0,0 +1,333 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; +import org.apache.commons.compress.utils.CharsetNames; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class TarTestCase extends AbstractTestCase { + + @Test + public void testTarArchiveCreation() throws Exception { + final File output = new File(dir, "bla.tar"); + final File file1 = getFile("test1.xml"); + final OutputStream out = new FileOutputStream(output); + final ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream("tar", out); + final TarArchiveEntry entry = new TarArchiveEntry("testdata/test1.xml"); + entry.setModTime(0); + entry.setSize(file1.length()); + entry.setUserId(0); + entry.setGroupId(0); + entry.setUserName("avalon"); + entry.setGroupName("excalibur"); + entry.setMode(0100000); + os.putArchiveEntry(entry); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + os.close(); + } + + @Test + public void testTarArchiveLongNameCreation() throws Exception { + final String name = "testdata/12345678901234567890123456789012345678901234567890123456789012345678901234567890123456.xml"; + final byte[] bytes = name.getBytes(CharsetNames.UTF_8); + assertEquals(bytes.length, 99); + + final File output = new File(dir, "bla.tar"); + final File file1 = getFile("test1.xml"); + final OutputStream out = new FileOutputStream(output); + final ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream("tar", out); + final TarArchiveEntry entry = new TarArchiveEntry(name); + entry.setModTime(0); + entry.setSize(file1.length()); + entry.setUserId(0); + entry.setGroupId(0); + entry.setUserName("avalon"); + entry.setGroupName("excalibur"); + entry.setMode(0100000); + os.putArchiveEntry(entry); + final FileInputStream in = new FileInputStream(file1); + IOUtils.copy(in, os); + os.closeArchiveEntry(); + os.close(); + out.close(); + in.close(); + + + ArchiveOutputStream os2 = null; + try { + final String toLongName = "testdata/123456789012345678901234567890123456789012345678901234567890123456789012345678901234567.xml"; + final File output2 = new File(dir, "bla.tar"); + final OutputStream out2 = new FileOutputStream(output2); + os2 = new ArchiveStreamFactory().createArchiveOutputStream("tar", out2); + final TarArchiveEntry entry2 = new TarArchiveEntry(toLongName); + entry2.setModTime(0); + entry2.setSize(file1.length()); + entry2.setUserId(0); + entry2.setGroupId(0); + entry2.setUserName("avalon"); + entry2.setGroupName("excalibur"); + entry2.setMode(0100000); + os2.putArchiveEntry(entry); + IOUtils.copy(new FileInputStream(file1), os2); + os2.closeArchiveEntry(); + } catch(final IOException e) { + assertTrue(true); + } finally { + if (os2 != null){ + os2.close(); + } + } + } + + @Test + public void testTarUnarchive() throws Exception { + final File input = getFile("bla.tar"); + final InputStream is = new FileInputStream(input); + final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("tar", is); + final TarArchiveEntry entry = (TarArchiveEntry)in.getNextEntry(); + final OutputStream out = new FileOutputStream(new File(dir, entry.getName())); + IOUtils.copy(in, out); + in.close(); + out.close(); + } + + @Test + public void testCOMPRESS114() throws Exception { + final File input = getFile("COMPRESS-114.tar"); + final InputStream is = new FileInputStream(input); + final ArchiveInputStream in = new TarArchiveInputStream(is, + CharsetNames.ISO_8859_1); + TarArchiveEntry entry = (TarArchiveEntry)in.getNextEntry(); + assertEquals("3\u00b1\u00b1\u00b1F06\u00b1W2345\u00b1ZB\u00b1la\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1BLA", entry.getName()); + entry = (TarArchiveEntry)in.getNextEntry(); + assertEquals("0302-0601-3\u00b1\u00b1\u00b1F06\u00b1W2345\u00b1ZB\u00b1la\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1BLA",entry.getName()); + in.close(); + } + + @Test + public void testDirectoryEntryFromFile() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + TarArchiveOutputStream tos = null; + TarArchiveInputStream tis = null; + try { + archive = File.createTempFile("test.", ".tar", tmp[0]); + archive.deleteOnExit(); + tos = new TarArchiveOutputStream(new FileOutputStream(archive)); + final long beforeArchiveWrite = tmp[0].lastModified(); + final TarArchiveEntry in = new TarArchiveEntry(tmp[0], "foo"); + tos.putArchiveEntry(in); + tos.closeArchiveEntry(); + tos.close(); + tos = null; + tis = new TarArchiveInputStream(new FileInputStream(archive)); + final TarArchiveEntry out = tis.getNextTarEntry(); + tis.close(); + tis = null; + assertNotNull(out); + assertEquals("foo/", out.getName()); + assertEquals(0, out.getSize()); + // TAR stores time with a granularity of 1 second + assertEquals(beforeArchiveWrite / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertTrue(out.isDirectory()); + } finally { + if (tis != null) { + tis.close(); + } + if (tos != null) { + tos.close(); + } + tryHardToDelete(archive); + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testExplicitDirectoryEntry() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + TarArchiveOutputStream tos = null; + TarArchiveInputStream tis = null; + try { + archive = File.createTempFile("test.", ".tar", tmp[0]); + archive.deleteOnExit(); + tos = new TarArchiveOutputStream(new FileOutputStream(archive)); + final long beforeArchiveWrite = tmp[0].lastModified(); + final TarArchiveEntry in = new TarArchiveEntry("foo/"); + in.setModTime(beforeArchiveWrite); + tos.putArchiveEntry(in); + tos.closeArchiveEntry(); + tos.close(); + tos = null; + tis = new TarArchiveInputStream(new FileInputStream(archive)); + final TarArchiveEntry out = tis.getNextTarEntry(); + tis.close(); + tis = null; + assertNotNull(out); + assertEquals("foo/", out.getName()); + assertEquals(0, out.getSize()); + assertEquals(beforeArchiveWrite / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertTrue(out.isDirectory()); + } finally { + if (tis != null) { + tis.close(); + } + if (tos != null) { + tos.close(); + } + tryHardToDelete(archive); + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testFileEntryFromFile() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + TarArchiveOutputStream tos = null; + TarArchiveInputStream tis = null; + FileInputStream fis = null; + try { + archive = File.createTempFile("test.", ".tar", tmp[0]); + archive.deleteOnExit(); + tos = new TarArchiveOutputStream(new FileOutputStream(archive)); + final TarArchiveEntry in = new TarArchiveEntry(tmp[1], "foo"); + tos.putArchiveEntry(in); + final byte[] b = new byte[(int) tmp[1].length()]; + fis = new FileInputStream(tmp[1]); + while (fis.read(b) > 0) { + tos.write(b); + } + fis.close(); + fis = null; + tos.closeArchiveEntry(); + tos.close(); + tos = null; + tis = new TarArchiveInputStream(new FileInputStream(archive)); + final TarArchiveEntry out = tis.getNextTarEntry(); + tis.close(); + tis = null; + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(tmp[1].length(), out.getSize()); + assertEquals(tmp[1].lastModified() / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertFalse(out.isDirectory()); + } finally { + if (tis != null) { + tis.close(); + } + if (tos != null) { + tos.close(); + } + tryHardToDelete(archive); + if (fis != null) { + fis.close(); + } + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testExplicitFileEntry() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + TarArchiveOutputStream tos = null; + TarArchiveInputStream tis = null; + FileInputStream fis = null; + try { + archive = File.createTempFile("test.", ".tar", tmp[0]); + archive.deleteOnExit(); + tos = new TarArchiveOutputStream(new FileOutputStream(archive)); + final TarArchiveEntry in = new TarArchiveEntry("foo"); + in.setModTime(tmp[1].lastModified()); + in.setSize(tmp[1].length()); + tos.putArchiveEntry(in); + final byte[] b = new byte[(int) tmp[1].length()]; + fis = new FileInputStream(tmp[1]); + while (fis.read(b) > 0) { + tos.write(b); + } + fis.close(); + fis = null; + tos.closeArchiveEntry(); + tos.close(); + tos = null; + tis = new TarArchiveInputStream(new FileInputStream(archive)); + final TarArchiveEntry out = tis.getNextTarEntry(); + tis.close(); + tis = null; + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(tmp[1].length(), out.getSize()); + assertEquals(tmp[1].lastModified() / 1000, + out.getLastModifiedDate().getTime() / 1000); + assertFalse(out.isDirectory()); + } finally { + if (tis != null) { + tis.close(); + } + if (tos != null) { + tos.close(); + } + tryHardToDelete(archive); + if (fis != null) { + fis.close(); + } + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testCOMPRESS178() throws Exception { + final File input = getFile("COMPRESS-178.tar"); + final InputStream is = new FileInputStream(input); + final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("tar", is); + try { + in.getNextEntry(); + fail("Expected IOException"); + } catch (final IOException e) { + final Throwable t = e.getCause(); + assertTrue("Expected cause = IllegalArgumentException", t instanceof IllegalArgumentException); + } + in.close(); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/TestArchiveStreamProvider.java b/src/test/java/org/apache/commons/compress/archivers/TestArchiveStreamProvider.java new file mode 100644 index 000000000..66b7f5eb2 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/TestArchiveStreamProvider.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers; + +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +public class TestArchiveStreamProvider implements ArchiveStreamProvider { + + public static final class ArchiveInvocationConfirmationException extends ArchiveException { + + private static final long serialVersionUID = 1L; + + public ArchiveInvocationConfirmationException(final String message) { + super(message); + } + } + + @Override + public ArchiveInputStream createArchiveInputStream(final String name, final InputStream in, final String encoding) throws ArchiveException { + throw new ArchiveInvocationConfirmationException(name); + } + + @Override + public ArchiveOutputStream createArchiveOutputStream(final String name, final OutputStream out, final String encoding) throws ArchiveException { + throw new ArchiveInvocationConfirmationException(name); + } + + @Override + public Set<String> getInputStreamArchiveNames() { + final HashSet<String> set = new HashSet<>(); + Collections.addAll(set, "ArchiveTestInput1"); + return set; + } + + @Override + public Set<String> getOutputStreamArchiveNames() { + final HashSet<String> set = new HashSet<>(); + Collections.addAll(set, "ArchiveTestOutput1"); + return set; + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java b/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java new file mode 100644 index 000000000..1d0b1510d --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java @@ -0,0 +1,710 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.zip.Zip64Mode; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntryPredicate; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.compress.archivers.zip.ZipMethod; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; +import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; +import org.junit.Assert; +import org.junit.Test; + +public final class ZipTestCase extends AbstractTestCase { + /** + * Archives 2 files and unarchives it again. If the file length of result + * and source is the same, it looks like the operations have worked + * @throws Exception + */ + @Test + public void testZipArchiveCreation() throws Exception { + // Archive + final File output = new File(dir, "bla.zip"); + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + + final OutputStream out = new FileOutputStream(output); + ArchiveOutputStream os = null; + try { + os = new ArchiveStreamFactory() + .createArchiveOutputStream("zip", out); + os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + + os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); + IOUtils.copy(new FileInputStream(file2), os); + os.closeArchiveEntry(); + } finally { + if (os != null) { + os.close(); + } + } + out.close(); + + // Unarchive the same + final List<File> results = new ArrayList<>(); + + final InputStream is = new FileInputStream(output); + ArchiveInputStream in = null; + try { + in = new ArchiveStreamFactory() + .createArchiveInputStream("zip", is); + + ZipArchiveEntry entry = null; + while((entry = (ZipArchiveEntry)in.getNextEntry()) != null) { + final File outfile = new File(resultDir.getCanonicalPath() + "/result/" + entry.getName()); + outfile.getParentFile().mkdirs(); + try (OutputStream o = new FileOutputStream(outfile)) { + IOUtils.copy(in, o); + } + results.add(outfile); + } + } finally { + if (in != null) { + in.close(); + } + } + is.close(); + + assertEquals(results.size(), 2); + File result = results.get(0); + assertEquals(file1.length(), result.length()); + result = results.get(1); + assertEquals(file2.length(), result.length()); + } + + /** + * Archives 2 files and unarchives it again. If the file contents of result + * and source is the same, it looks like the operations have worked + * @throws Exception + */ + @Test + public void testZipArchiveCreationInMemory() throws Exception { + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + final byte[] file1Contents = new byte[(int) file1.length()]; + final byte[] file2Contents = new byte[(int) file2.length()]; + IOUtils.readFully(new FileInputStream(file1), file1Contents); + IOUtils.readFully(new FileInputStream(file2), file2Contents); + + SeekableInMemoryByteChannel channel = new SeekableInMemoryByteChannel(); + try (ZipArchiveOutputStream os = new ZipArchiveOutputStream(channel)) { + os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); + os.write(file1Contents); + os.closeArchiveEntry(); + + os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); + os.write(file2Contents); + os.closeArchiveEntry(); + } + + // Unarchive the same + final List<byte[]> results = new ArrayList<>(); + + try (ArchiveInputStream in = new ArchiveStreamFactory() + .createArchiveInputStream("zip", new ByteArrayInputStream(channel.array()))) { + + ZipArchiveEntry entry; + while((entry = (ZipArchiveEntry)in.getNextEntry()) != null) { + byte[] result = new byte[(int) entry.getSize()]; + IOUtils.readFully(in, result); + results.add(result); + } + } + + assertArrayEquals(results.get(0), file1Contents); + assertArrayEquals(results.get(1), file2Contents); + } + + /** + * Simple unarchive test. Asserts nothing. + * @throws Exception + */ + @Test + public void testZipUnarchive() throws Exception { + final File input = getFile("bla.zip"); + try (final InputStream is = new FileInputStream(input); + final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("zip", is)) { + final ZipArchiveEntry entry = (ZipArchiveEntry) in.getNextEntry(); + try (final OutputStream out = new FileOutputStream(new File(dir, entry.getName()))) { + IOUtils.copy(in, out); + } + } + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-208" + * >COMPRESS-208</a>. + */ + @Test + public void testSkipsPK00Prefix() throws Exception { + final File input = getFile("COMPRESS-208.zip"); + final ArrayList<String> al = new ArrayList<>(); + al.add("test1.xml"); + al.add("test2.xml"); + try (InputStream is = new FileInputStream(input)) { + checkArchiveContent(new ZipArchiveInputStream(is), al); + } + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-93" + * >COMPRESS-93</a>. + */ + @Test + public void testSupportedCompressionMethod() throws IOException { + /* + ZipFile bla = new ZipFile(getFile("bla.zip")); + assertTrue(bla.canReadEntryData(bla.getEntry("test1.xml"))); + bla.close(); + */ + + final ZipFile moby = new ZipFile(getFile("moby.zip")); + final ZipArchiveEntry entry = moby.getEntry("README"); + assertEquals("method", ZipMethod.TOKENIZATION.getCode(), entry.getMethod()); + assertFalse(moby.canReadEntryData(entry)); + moby.close(); + } + + /** + * Test case for being able to skip an entry in an + * {@link ZipArchiveInputStream} even if the compression method of that + * entry is unsupported. + * + * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-93" + * >COMPRESS-93</a> + */ + @Test + public void testSkipEntryWithUnsupportedCompressionMethod() + throws IOException { + try (ZipArchiveInputStream zip = new ZipArchiveInputStream(new FileInputStream(getFile("moby.zip")))) { + final ZipArchiveEntry entry = zip.getNextZipEntry(); + assertEquals("method", ZipMethod.TOKENIZATION.getCode(), entry.getMethod()); + assertEquals("README", entry.getName()); + assertFalse(zip.canReadEntryData(entry)); + try { + assertNull(zip.getNextZipEntry()); + } catch (final IOException e) { + e.printStackTrace(); + fail("COMPRESS-93: Unable to skip an unsupported zip entry"); + } + } + } + + /** + * Checks if all entries from a nested archive can be read. + * The archive: OSX_ArchiveWithNestedArchive.zip contains: + * NestedArchiv.zip and test.xml3. + * + * The nested archive: NestedArchive.zip contains test1.xml and test2.xml + * + * @throws Exception + */ + @Test + public void testListAllFilesWithNestedArchive() throws Exception { + final File input = getFile("OSX_ArchiveWithNestedArchive.zip"); + + final List<String> results = new ArrayList<>(); + final List<ZipException> expectedExceptions = new ArrayList<>(); + + final InputStream is = new FileInputStream(input); + ArchiveInputStream in = null; + try { + in = new ArchiveStreamFactory().createArchiveInputStream("zip", is); + + ZipArchiveEntry entry = null; + while ((entry = (ZipArchiveEntry) in.getNextEntry()) != null) { + results.add(entry.getName()); + + final ArchiveInputStream nestedIn = new ArchiveStreamFactory().createArchiveInputStream("zip", in); + try { + ZipArchiveEntry nestedEntry = null; + while ((nestedEntry = (ZipArchiveEntry) nestedIn.getNextEntry()) != null) { + results.add(nestedEntry.getName()); + } + } catch (ZipException ex) { + // expected since you cannot create a final ArchiveInputStream from test3.xml + expectedExceptions.add(ex); + } + // nested stream must not be closed here + } + } finally { + if (in != null) { + in.close(); + } + } + is.close(); + + assertTrue(results.contains("NestedArchiv.zip")); + assertTrue(results.contains("test1.xml")); + assertTrue(results.contains("test2.xml")); + assertTrue(results.contains("test3.xml")); + assertEquals(1, expectedExceptions.size()); + } + + @Test + public void testDirectoryEntryFromFile() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + ZipArchiveOutputStream zos = null; + ZipFile zf = null; + try { + archive = File.createTempFile("test.", ".zip", tmp[0]); + archive.deleteOnExit(); + zos = new ZipArchiveOutputStream(archive); + final long beforeArchiveWrite = tmp[0].lastModified(); + final ZipArchiveEntry in = new ZipArchiveEntry(tmp[0], "foo"); + zos.putArchiveEntry(in); + zos.closeArchiveEntry(); + zos.close(); + zos = null; + zf = new ZipFile(archive); + final ZipArchiveEntry out = zf.getEntry("foo/"); + assertNotNull(out); + assertEquals("foo/", out.getName()); + assertEquals(0, out.getSize()); + // ZIP stores time with a granularity of 2 seconds + assertEquals(beforeArchiveWrite / 2000, + out.getLastModifiedDate().getTime() / 2000); + assertTrue(out.isDirectory()); + } finally { + ZipFile.closeQuietly(zf); + if (zos != null) { + zos.close(); + } + tryHardToDelete(archive); + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testExplicitDirectoryEntry() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + ZipArchiveOutputStream zos = null; + ZipFile zf = null; + try { + archive = File.createTempFile("test.", ".zip", tmp[0]); + archive.deleteOnExit(); + zos = new ZipArchiveOutputStream(archive); + final long beforeArchiveWrite = tmp[0].lastModified(); + final ZipArchiveEntry in = new ZipArchiveEntry("foo/"); + in.setTime(beforeArchiveWrite); + zos.putArchiveEntry(in); + zos.closeArchiveEntry(); + zos.close(); + zos = null; + zf = new ZipFile(archive); + final ZipArchiveEntry out = zf.getEntry("foo/"); + assertNotNull(out); + assertEquals("foo/", out.getName()); + assertEquals(0, out.getSize()); + assertEquals(beforeArchiveWrite / 2000, + out.getLastModifiedDate().getTime() / 2000); + assertTrue(out.isDirectory()); + } finally { + ZipFile.closeQuietly(zf); + if (zos != null) { + zos.close(); + } + tryHardToDelete(archive); + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + String first_payload = "ABBA"; + String second_payload = "AAAAAAAAAAAA"; + ZipArchiveEntryPredicate allFilesPredicate = new ZipArchiveEntryPredicate() { + @Override + public boolean test(final ZipArchiveEntry zipArchiveEntry) { + return true; + } + }; + + @Test + public void testCopyRawEntriesFromFile() + throws IOException { + + final File[] tmp = createTempDirAndFile(); + final File reference = createReferenceFile(tmp[0], Zip64Mode.Never, "expected."); + + final File a1 = File.createTempFile("src1.", ".zip", tmp[0]); + try (final ZipArchiveOutputStream zos = new ZipArchiveOutputStream(a1)) { + zos.setUseZip64(Zip64Mode.Never); + createFirstEntry(zos).close(); + } + + final File a2 = File.createTempFile("src2.", ".zip", tmp[0]); + try (final ZipArchiveOutputStream zos1 = new ZipArchiveOutputStream(a2)) { + zos1.setUseZip64(Zip64Mode.Never); + createSecondEntry(zos1).close(); + } + + try (final ZipFile zf1 = new ZipFile(a1); final ZipFile zf2 = new ZipFile(a2)) { + final File fileResult = File.createTempFile("file-actual.", ".zip", tmp[0]); + try (final ZipArchiveOutputStream zos2 = new ZipArchiveOutputStream(fileResult)) { + zf1.copyRawEntries(zos2, allFilesPredicate); + zf2.copyRawEntries(zos2, allFilesPredicate); + } + // copyRawEntries does not add superfluous zip64 header like regular zip output stream + // does when using Zip64Mode.AsNeeded so all the source material has to be Zip64Mode.Never, + // if exact binary equality is to be achieved + assertSameFileContents(reference, fileResult); + } + } + + @Test + public void testCopyRawZip64EntryFromFile() + throws IOException { + + final File[] tmp = createTempDirAndFile(); + final File reference = File.createTempFile("z64reference.", ".zip", tmp[0]); + try (final ZipArchiveOutputStream zos1 = new ZipArchiveOutputStream(reference)) { + zos1.setUseZip64(Zip64Mode.Always); + createFirstEntry(zos1); + } + + final File a1 = File.createTempFile("zip64src.", ".zip", tmp[0]); + try (final ZipArchiveOutputStream zos = new ZipArchiveOutputStream(a1)) { + zos.setUseZip64(Zip64Mode.Always); + createFirstEntry(zos).close(); + } + + final File fileResult = File.createTempFile("file-actual.", ".zip", tmp[0]); + try (final ZipFile zf1 = new ZipFile(a1)) { + try (final ZipArchiveOutputStream zos2 = new ZipArchiveOutputStream(fileResult)) { + zos2.setUseZip64(Zip64Mode.Always); + zf1.copyRawEntries(zos2, allFilesPredicate); + } + assertSameFileContents(reference, fileResult); + } + } + + @Test + public void testUnixModeInAddRaw() throws IOException { + + final File[] tmp = createTempDirAndFile(); + + final File a1 = File.createTempFile("unixModeBits.", ".zip", tmp[0]); + try (final ZipArchiveOutputStream zos = new ZipArchiveOutputStream(a1)) { + + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry("fred"); + archiveEntry.setUnixMode(0664); + archiveEntry.setMethod(ZipEntry.DEFLATED); + zos.addRawArchiveEntry(archiveEntry, new ByteArrayInputStream("fud".getBytes())); + } + + try (final ZipFile zf1 = new ZipFile(a1)) { + final ZipArchiveEntry fred = zf1.getEntry("fred"); + assertEquals(0664, fred.getUnixMode()); + } + } + + private File createReferenceFile(final File directory, final Zip64Mode zipMode, final String prefix) + throws IOException { + final File reference = File.createTempFile(prefix, ".zip", directory); + try (final ZipArchiveOutputStream zos = new ZipArchiveOutputStream(reference)) { + zos.setUseZip64(zipMode); + createFirstEntry(zos); + createSecondEntry(zos); + } + return reference; + } + + private ZipArchiveOutputStream createFirstEntry(final ZipArchiveOutputStream zos) throws IOException { + createArchiveEntry(first_payload, zos, "file1.txt"); + return zos; + } + + private ZipArchiveOutputStream createSecondEntry(final ZipArchiveOutputStream zos) throws IOException { + createArchiveEntry(second_payload, zos, "file2.txt"); + return zos; + } + + + private void assertSameFileContents(final File expectedFile, final File actualFile) throws IOException { + final int size = (int) Math.max(expectedFile.length(), actualFile.length()); + try (final ZipFile expected = new ZipFile(expectedFile); final ZipFile actual = new ZipFile(actualFile)) { + final byte[] expectedBuf = new byte[size]; + final byte[] actualBuf = new byte[size]; + + final Enumeration<ZipArchiveEntry> actualInOrder = actual.getEntriesInPhysicalOrder(); + final Enumeration<ZipArchiveEntry> expectedInOrder = expected.getEntriesInPhysicalOrder(); + + while (actualInOrder.hasMoreElements()) { + final ZipArchiveEntry actualElement = actualInOrder.nextElement(); + final ZipArchiveEntry expectedElement = expectedInOrder.nextElement(); + assertEquals(expectedElement.getName(), actualElement.getName()); + // Don't compare timestamps since they may vary; + // there's no support for stubbed out clock (TimeSource) in ZipArchiveOutputStream + assertEquals(expectedElement.getMethod(), actualElement.getMethod()); + assertEquals(expectedElement.getGeneralPurposeBit(), actualElement.getGeneralPurposeBit()); + assertEquals(expectedElement.getCrc(), actualElement.getCrc()); + assertEquals(expectedElement.getCompressedSize(), actualElement.getCompressedSize()); + assertEquals(expectedElement.getSize(), actualElement.getSize()); + assertEquals(expectedElement.getExternalAttributes(), actualElement.getExternalAttributes()); + assertEquals(expectedElement.getInternalAttributes(), actualElement.getInternalAttributes()); + + final InputStream actualIs = actual.getInputStream(actualElement); + final InputStream expectedIs = expected.getInputStream(expectedElement); + IOUtils.readFully(expectedIs, expectedBuf); + IOUtils.readFully(actualIs, actualBuf); + expectedIs.close(); + actualIs.close(); + Assert.assertArrayEquals(expectedBuf, actualBuf); // Buffers are larger than payload. dont care + } + + } + } + + + private void createArchiveEntry(final String payload, final ZipArchiveOutputStream zos, final String name) + throws IOException { + final ZipArchiveEntry in = new ZipArchiveEntry(name); + zos.putArchiveEntry(in); + + zos.write(payload.getBytes()); + zos.closeArchiveEntry(); + } + + @Test + public void testFileEntryFromFile() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + ZipArchiveOutputStream zos = null; + ZipFile zf = null; + FileInputStream fis = null; + try { + archive = File.createTempFile("test.", ".zip", tmp[0]); + archive.deleteOnExit(); + zos = new ZipArchiveOutputStream(archive); + final ZipArchiveEntry in = new ZipArchiveEntry(tmp[1], "foo"); + zos.putArchiveEntry(in); + final byte[] b = new byte[(int) tmp[1].length()]; + fis = new FileInputStream(tmp[1]); + while (fis.read(b) > 0) { + zos.write(b); + } + fis.close(); + fis = null; + zos.closeArchiveEntry(); + zos.close(); + zos = null; + zf = new ZipFile(archive); + final ZipArchiveEntry out = zf.getEntry("foo"); + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(tmp[1].length(), out.getSize()); + assertEquals(tmp[1].lastModified() / 2000, + out.getLastModifiedDate().getTime() / 2000); + assertFalse(out.isDirectory()); + } finally { + ZipFile.closeQuietly(zf); + if (zos != null) { + zos.close(); + } + tryHardToDelete(archive); + if (fis != null) { + fis.close(); + } + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void testExplicitFileEntry() throws Exception { + final File[] tmp = createTempDirAndFile(); + File archive = null; + ZipArchiveOutputStream zos = null; + ZipFile zf = null; + FileInputStream fis = null; + try { + archive = File.createTempFile("test.", ".zip", tmp[0]); + archive.deleteOnExit(); + zos = new ZipArchiveOutputStream(archive); + final ZipArchiveEntry in = new ZipArchiveEntry("foo"); + in.setTime(tmp[1].lastModified()); + in.setSize(tmp[1].length()); + zos.putArchiveEntry(in); + final byte[] b = new byte[(int) tmp[1].length()]; + fis = new FileInputStream(tmp[1]); + while (fis.read(b) > 0) { + zos.write(b); + } + fis.close(); + fis = null; + zos.closeArchiveEntry(); + zos.close(); + zos = null; + zf = new ZipFile(archive); + final ZipArchiveEntry out = zf.getEntry("foo"); + assertNotNull(out); + assertEquals("foo", out.getName()); + assertEquals(tmp[1].length(), out.getSize()); + assertEquals(tmp[1].lastModified() / 2000, + out.getLastModifiedDate().getTime() / 2000); + assertFalse(out.isDirectory()); + } finally { + ZipFile.closeQuietly(zf); + if (zos != null) { + zos.close(); + } + tryHardToDelete(archive); + if (fis != null) { + fis.close(); + } + tryHardToDelete(tmp[1]); + rmdir(tmp[0]); + } + } + + @Test + public void inputStreamStatisticsOfZipBombExcel() throws IOException, ArchiveException { + Map<String, List<Long>> expected = new HashMap<String, List<Long>>() {{ + put("[Content_Types].xml", Arrays.asList(8390036L, 8600L)); + put("xl/worksheets/sheet1.xml", Arrays.asList(1348L, 508L)); + }}; + testInputStreamStatistics("zipbomb.xlsx", expected); + } + + @Test + public void inputStreamStatisticsForImplodedEntry() throws IOException, ArchiveException { + Map<String, List<Long>> expected = new HashMap<String, List<Long>>() {{ + put("LICENSE.TXT", Arrays.asList(11560L, 4131L)); + }}; + testInputStreamStatistics("imploding-8Kdict-3trees.zip", expected); + } + + @Test + public void inputStreamStatisticsForShrunkEntry() throws IOException, ArchiveException { + Map<String, List<Long>> expected = new HashMap<String, List<Long>>() {{ + put("TEST1.XML", Arrays.asList(76L, 66L)); + put("TEST2.XML", Arrays.asList(81L, 76L)); + }}; + testInputStreamStatistics("SHRUNK.ZIP", expected); + } + + @Test + public void inputStreamStatisticsForStoredEntry() throws IOException, ArchiveException { + Map<String, List<Long>> expected = new HashMap<String, List<Long>>() {{ + put("test.txt", Arrays.asList(5L, 5L)); + }}; + testInputStreamStatistics("COMPRESS-264.zip", expected); + } + + @Test + public void inputStreamStatisticsForBzip2Entry() throws IOException, ArchiveException { + Map<String, List<Long>> expected = new HashMap<String, List<Long>>() {{ + put("lots-of-as", Arrays.asList(42L, 39L)); + }}; + testInputStreamStatistics("bzip2-zip.zip", expected); + } + + @Test + public void inputStreamStatisticsForDeflate64Entry() throws IOException, ArchiveException { + Map<String, List<Long>> expected = new HashMap<String, List<Long>>() {{ + put("input2", Arrays.asList(3072L, 2111L)); + }}; + testInputStreamStatistics("COMPRESS-380/COMPRESS-380.zip", expected); + } + + private void testInputStreamStatistics(String fileName, Map<String, List<Long>> expectedStatistics) + throws IOException, ArchiveException { + final File input = getFile(fileName); + + final Map<String,List<List<Long>>> actualStatistics = new HashMap<>(); + + // stream access + try (final FileInputStream fis = new FileInputStream(input); + final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("zip", fis)) { + for (ArchiveEntry entry; (entry = in.getNextEntry()) != null; ) { + readStream(in, entry, actualStatistics); + } + } + + // file access + try (final ZipFile zf = new ZipFile(input)) { + final Enumeration<ZipArchiveEntry> entries = zf.getEntries(); + while (entries.hasMoreElements()) { + final ZipArchiveEntry zae = entries.nextElement(); + try (InputStream in = zf.getInputStream(zae)) { + readStream(in, zae, actualStatistics); + } + } + } + + // compare statistics of stream / file access + for (Map.Entry<String,List<List<Long>>> me : actualStatistics.entrySet()) { + assertEquals("Mismatch of stats for: " + me.getKey(), + me.getValue().get(0), me.getValue().get(1)); + } + + for (Map.Entry<String, List<Long>> me : expectedStatistics.entrySet()) { + assertEquals("Mismatch of stats with expected value for: " + me.getKey(), + me.getValue(), actualStatistics.get(me.getKey()).get(0)); + } + } + + private void readStream(final InputStream in, final ArchiveEntry entry, final Map<String,List<List<Long>>> map) throws IOException { + final byte[] buf = new byte[4096]; + final InputStreamStatistics stats = (InputStreamStatistics) in; + while (in.read(buf) != -1); + + final String name = entry.getName(); + final List<List<Long>> l; + if (map.containsKey(name)) { + l = map.get(name); + } else { + map.put(name, l = new ArrayList<>()); + } + + final long t = stats.getUncompressedCount(); + final long b = stats.getCompressedCount(); + l.add(Arrays.asList(t, b)); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/ar/ArArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/ar/ArArchiveInputStreamTest.java new file mode 100644 index 000000000..e665ff952 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/ar/ArArchiveInputStreamTest.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.ar; + +import static org.junit.Assert.*; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.utils.ArchiveUtils; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public class ArArchiveInputStreamTest extends AbstractTestCase { + + @Test + public void testReadLongNamesGNU() throws Exception { + checkLongNameEntry("longfile_gnu.ar"); + } + + @Test + public void testReadLongNamesBSD() throws Exception { + checkLongNameEntry("longfile_bsd.ar"); + } + + private void checkLongNameEntry(final String archive) throws Exception { + try (final FileInputStream fis = new FileInputStream(getFile(archive)); + final ArArchiveInputStream s = new ArArchiveInputStream(new BufferedInputStream(fis))) { + ArchiveEntry e = s.getNextEntry(); + assertEquals("this_is_a_long_file_name.txt", e.getName()); + assertEquals(14, e.getSize()); + final byte[] hello = new byte[14]; + s.read(hello); + assertEquals("Hello, world!\n", ArchiveUtils.toAsciiString(hello)); + e = s.getNextEntry(); + assertEquals("this_is_a_long_file_name_as_well.txt", e.getName()); + assertEquals(4, e.getSize()); + final byte[] bye = new byte[4]; + s.read(bye); + assertEquals("Bye\n", ArchiveUtils.toAsciiString(bye)); + assertNull(s.getNextEntry()); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + try (FileInputStream in = new FileInputStream(getFile("bla.ar")); + ArArchiveInputStream archive = new ArArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read()); + assertEquals(-1, archive.read()); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + byte[] buf = new byte[2]; + try (FileInputStream in = new FileInputStream(getFile("bla.ar")); + ArArchiveInputStream archive = new ArArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read(buf)); + assertEquals(-1, archive.read(buf)); + } + } + + @Test(expected=IllegalStateException.class) + public void cantReadWithoutOpeningAnEntry() throws Exception { + try (FileInputStream in = new FileInputStream(getFile("bla.ar")); + ArArchiveInputStream archive = new ArArchiveInputStream(in)) { + archive.read(); + } + } + + @Test(expected=IllegalStateException.class) + public void cantReadAfterClose() throws Exception { + try (FileInputStream in = new FileInputStream(getFile("bla.ar")); + ArArchiveInputStream archive = new ArArchiveInputStream(in)) { + archive.close(); + archive.read(); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/ar/ArArchiveOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/ar/ArArchiveOutputStreamTest.java new file mode 100644 index 000000000..0c309aab9 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/ar/ArArchiveOutputStreamTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.ar; + +import static org.junit.Assert.*; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.compress.AbstractTestCase; +import org.junit.Test; + +public class ArArchiveOutputStreamTest extends AbstractTestCase { + + @Test + public void testLongFileNamesCauseExceptionByDefault() { + try (ArArchiveOutputStream os = new ArArchiveOutputStream(new ByteArrayOutputStream())) { + final ArArchiveEntry ae = new ArArchiveEntry("this_is_a_long_name.txt", 0); + os.putArchiveEntry(ae); + fail("Expected an exception"); + } catch (final IOException ex) { + assertTrue(ex.getMessage().startsWith("filename too long")); + } + } + + @Test + public void testLongFileNamesWorkUsingBSDDialect() throws Exception { + final File[] df = createTempDirAndFile(); + try (FileOutputStream fos = new FileOutputStream(df[1]); + ArArchiveOutputStream os = new ArArchiveOutputStream(fos)) { + os.setLongFileMode(ArArchiveOutputStream.LONGFILE_BSD); + final ArArchiveEntry ae = new ArArchiveEntry("this_is_a_long_name.txt", 14); + os.putArchiveEntry(ae); + os.write(new byte[] { 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', '\n' }); + os.closeArchiveEntry(); + + final List<String> expected = new ArrayList<>(); + expected.add("this_is_a_long_name.txt"); + checkArchiveContent(df[1], expected); + } finally { + rmdir(df[0]); + } + } +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/arj/ArjArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/arj/ArjArchiveInputStreamTest.java new file mode 100644 index 000000000..70a8b1f5b --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/arj/ArjArchiveInputStreamTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.arj; + +import static org.junit.Assert.*; + +import java.io.FileInputStream; +import java.util.Calendar; +import java.util.TimeZone; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public class ArjArchiveInputStreamTest extends AbstractTestCase { + + @Test + public void testArjUnarchive() throws Exception { + final StringBuilder expected = new StringBuilder(); + expected.append("test1.xml<?xml version=\"1.0\"?>\n"); + expected.append("<empty/>test2.xml<?xml version=\"1.0\"?>\n"); + expected.append("<empty/>\n"); + + + final ArjArchiveInputStream in = new ArjArchiveInputStream(new FileInputStream(getFile("bla.arj"))); + ArjArchiveEntry entry; + + final StringBuilder result = new StringBuilder(); + while ((entry = in.getNextEntry()) != null) { + result.append(entry.getName()); + int tmp; + while ((tmp = in.read()) != -1) { + result.append((char) tmp); + } + assertFalse(entry.isDirectory()); + } + in.close(); + assertEquals(result.toString(), expected.toString()); + } + + @Test + public void testReadingOfAttributesDosVersion() throws Exception { + final ArjArchiveInputStream in = new ArjArchiveInputStream(new FileInputStream(getFile("bla.arj"))); + final ArjArchiveEntry entry = in.getNextEntry(); + assertEquals("test1.xml", entry.getName()); + assertEquals(30, entry.getSize()); + assertEquals(0, entry.getUnixMode()); + final Calendar cal = Calendar.getInstance(); + cal.set(2008, 9, 6, 23, 50, 52); + cal.set(Calendar.MILLISECOND, 0); + assertEquals(cal.getTime(), entry.getLastModifiedDate()); + in.close(); + } + + @Test + public void testReadingOfAttributesUnixVersion() throws Exception { + final ArjArchiveInputStream in = new ArjArchiveInputStream(new FileInputStream(getFile("bla.unix.arj"))); + final ArjArchiveEntry entry = in.getNextEntry(); + assertEquals("test1.xml", entry.getName()); + assertEquals(30, entry.getSize()); + assertEquals(0664, entry.getUnixMode() & 07777 /* UnixStat.PERM_MASK */); + final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT+0000")); + cal.set(2008, 9, 6, 21, 50, 52); + cal.set(Calendar.MILLISECOND, 0); + assertEquals(cal.getTime(), entry.getLastModifiedDate()); + in.close(); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + try (FileInputStream in = new FileInputStream(getFile("bla.arj")); + ArjArchiveInputStream archive = new ArjArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read()); + assertEquals(-1, archive.read()); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + byte[] buf = new byte[2]; + try (FileInputStream in = new FileInputStream(getFile("bla.arj")); + ArjArchiveInputStream archive = new ArjArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read(buf)); + assertEquals(-1, archive.read(buf)); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/arj/CoverageTest.java b/src/test/java/org/apache/commons/compress/archivers/arj/CoverageTest.java new file mode 100644 index 000000000..6dd9f70dc --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/arj/CoverageTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.arj; + +import static org.junit.Assert.assertNotNull; + +import org.apache.commons.compress.archivers.arj.ArjArchiveEntry.HostOs; +import org.junit.Test; + +public class CoverageTest { + + @Test + public void testHostOsInstance() { + HostOs hostOs = new HostOs(); + assertNotNull(hostOs); + } + @Test + public void testHeaderInstances() { + assertNotNull(new LocalFileHeader.FileTypes()); + assertNotNull(new LocalFileHeader.Methods()); + assertNotNull(new LocalFileHeader.Flags()); + assertNotNull(new MainHeader.Flags()); + } + @Test + public void testCallLFHToString() { + LocalFileHeader lfh = new LocalFileHeader(); + assertNotNull(lfh.toString()); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java new file mode 100644 index 000000000..55a9f753f --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +import static org.junit.Assert.*; + +import java.io.FileInputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public class CpioArchiveInputStreamTest extends AbstractTestCase { + + @Test + public void testCpioUnarchive() throws Exception { + final StringBuilder expected = new StringBuilder(); + expected.append("./test1.xml<?xml version=\"1.0\"?>\n"); + expected.append("<empty/>./test2.xml<?xml version=\"1.0\"?>\n"); + expected.append("<empty/>\n"); + + + final CpioArchiveInputStream in = new CpioArchiveInputStream(new FileInputStream(getFile("bla.cpio"))); + CpioArchiveEntry entry; + + final StringBuilder result = new StringBuilder(); + while ((entry = (CpioArchiveEntry) in.getNextEntry()) != null) { + result.append(entry.getName()); + int tmp; + while ((tmp = in.read()) != -1) { + result.append((char) tmp); + } + } + in.close(); + assertEquals(result.toString(), expected.toString()); + } + + @Test + public void testCpioUnarchiveCreatedByRedlineRpm() throws Exception { + final CpioArchiveInputStream in = + new CpioArchiveInputStream(new FileInputStream(getFile("redline.cpio"))); + CpioArchiveEntry entry= null; + + int count = 0; + while ((entry = (CpioArchiveEntry) in.getNextEntry()) != null) { + count++; + assertNotNull(entry); + } + in.close(); + + assertEquals(count, 1); + } + + @Test + public void testCpioUnarchiveMultibyteCharName() throws Exception { + final CpioArchiveInputStream in = + new CpioArchiveInputStream(new FileInputStream(getFile("COMPRESS-459.cpio")), "UTF-8"); + CpioArchiveEntry entry= null; + + int count = 0; + while ((entry = (CpioArchiveEntry) in.getNextEntry()) != null) { + count++; + assertNotNull(entry); + } + in.close(); + + assertEquals(2, count); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + try (FileInputStream in = new FileInputStream(getFile("bla.cpio")); + CpioArchiveInputStream archive = new CpioArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read()); + assertEquals(-1, archive.read()); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + byte[] buf = new byte[2]; + try (FileInputStream in = new FileInputStream(getFile("bla.cpio")); + CpioArchiveInputStream archive = new CpioArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read(buf)); + assertEquals(-1, archive.read(buf)); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStreamTest.java new file mode 100644 index 000000000..ba83771a3 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveOutputStreamTest.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public class CpioArchiveOutputStreamTest extends AbstractTestCase { + + @Test + public void testWriteOldBinary() throws Exception { + final File f = getFile("test1.xml"); + final File output = new File(dir, "test.cpio"); + final FileOutputStream out = new FileOutputStream(output); + InputStream in = null; + try { + final CpioArchiveOutputStream os = + new CpioArchiveOutputStream(out, CpioConstants + .FORMAT_OLD_BINARY); + os.putArchiveEntry(new CpioArchiveEntry(CpioConstants + .FORMAT_OLD_BINARY, + f, "test1.xml")); + IOUtils.copy(in = new FileInputStream(f), os); + in.close(); + in = null; + os.closeArchiveEntry(); + os.close(); + } finally { + if (in != null) { + in.close(); + } + out.close(); + } + + try { + in = new CpioArchiveInputStream(new FileInputStream(output)); + final CpioArchiveEntry e = ((CpioArchiveInputStream) in) + .getNextCPIOEntry(); + assertEquals("test1.xml", e.getName()); + assertNull(((CpioArchiveInputStream) in).getNextEntry()); + } finally { + if (in != null) { + in.close(); + } + } + } +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveTest.java b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveTest.java new file mode 100644 index 000000000..0dac090f9 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.util.Arrays; +import java.util.Collection; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized.Parameters; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class CpioArchiveTest { + + @Parameters(name = "using {0}") + public static Collection<Object[]> factory() { + return Arrays.asList(new Object[][] { + new Object[] { CpioConstants.FORMAT_NEW }, + new Object[] { CpioConstants.FORMAT_NEW_CRC }, + new Object[] { CpioConstants.FORMAT_OLD_ASCII }, + new Object[] { CpioConstants.FORMAT_OLD_BINARY }, + }); + } + + private final short format; + + public CpioArchiveTest(short format) { + this.format = format; + } + + @Test + public void utf18RoundtripTest() throws Exception { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + try (CpioArchiveOutputStream os = new CpioArchiveOutputStream(baos, format, CpioConstants.BLOCK_SIZE, + "UTF-16LE")) { + CpioArchiveEntry entry = new CpioArchiveEntry(format, "T\u00e4st.txt", 4); + if (format == CpioConstants.FORMAT_NEW_CRC) { + entry.setChksum(10); + } + os.putArchiveEntry(entry); + os.write(new byte[] { 1, 2, 3, 4 }); + os.closeArchiveEntry(); + } + baos.close(); + try (ByteArrayInputStream bin = new ByteArrayInputStream(baos.toByteArray()); + CpioArchiveInputStream in = new CpioArchiveInputStream(bin, "UTF-16LE")) { + CpioArchiveEntry entry = (CpioArchiveEntry) in.getNextEntry(); + Assert.assertNotNull(entry); + Assert.assertEquals("T\u00e4st.txt", entry.getName()); + Assert.assertArrayEquals(new byte[] { 1, 2, 3, 4 }, IOUtils.toByteArray(in)); + } + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/cpio/CpioUtilTest.java b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioUtilTest.java new file mode 100644 index 000000000..5b1edb517 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioUtilTest.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.cpio; + +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class CpioUtilTest { + + @Test + public void oldBinMagic2ByteArrayNotSwapped() { + assertArrayEquals(new byte[] { (byte) 0xc7, 0x71 }, + CpioUtil.long2byteArray(CpioConstants.MAGIC_OLD_BINARY, + 2, false)); + } + + @Test + public void oldBinMagic2ByteArraySwapped() { + assertArrayEquals(new byte[] { 0x71, (byte) 0xc7, }, + CpioUtil.long2byteArray(CpioConstants.MAGIC_OLD_BINARY, + 2, true)); + } + + @Test + public void oldBinMagicFromByteArrayNotSwapped() { + assertEquals(CpioConstants.MAGIC_OLD_BINARY, + CpioUtil.byteArray2long(new byte[] { (byte) 0xc7, 0x71 }, + false)); + } + + @Test + public void oldBinMagicFromByteArraySwapped() { + assertEquals(CpioConstants.MAGIC_OLD_BINARY, + CpioUtil.byteArray2long(new byte[] { 0x71, (byte) 0xc7 }, + true)); + } + + + @Test(expected = UnsupportedOperationException.class) + public void testLong2byteArrayWithZeroThrowsUnsupportedOperationException() { + + CpioUtil.long2byteArray(0L, 0, false); + + } + + + @Test(expected = UnsupportedOperationException.class) + public void testLong2byteArrayWithPositiveThrowsUnsupportedOperationException() { + + CpioUtil.long2byteArray(0L, 1021, false); + + } + + + @Test(expected = UnsupportedOperationException.class) + public void testByteArray2longThrowsUnsupportedOperationException() { + + byte[] byteArray = new byte[1]; + + CpioUtil.byteArray2long(byteArray, true); + + } + + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveEntryTest.java b/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveEntryTest.java new file mode 100644 index 000000000..8cc8fd253 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveEntryTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class DumpArchiveEntryTest { + @Test + public void publicNameAddsTrailingSlashForDirectories() { + final DumpArchiveEntry ent = new DumpArchiveEntry("foo", "bar", -1, + DumpArchiveEntry.TYPE + .DIRECTORY); + assertEquals("bar", ent.getSimpleName()); + assertEquals("foo", ent.getOriginalName()); + assertEquals("foo/", ent.getName()); + } + + @Test + public void publicNameRemovesLeadingDotSlash() { + final DumpArchiveEntry ent = new DumpArchiveEntry("./foo", "bar"); + assertEquals("bar", ent.getSimpleName()); + assertEquals("./foo", ent.getOriginalName()); + assertEquals("foo", ent.getName()); + } + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStreamTest.java new file mode 100644 index 000000000..89f6f15df --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStreamTest.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import static org.junit.Assert.*; + +import java.io.FileInputStream; +import java.io.InputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public class DumpArchiveInputStreamTest extends AbstractTestCase { + + @Test + public void testNotADumpArchive() throws Exception { + try (FileInputStream is = new FileInputStream(getFile("bla.zip"))) { + new DumpArchiveInputStream(is).close(); + fail("expected an exception"); + } catch (final ArchiveException ex) { + // expected + assertTrue(ex.getCause() instanceof ShortFileException); + } + } + + @Test + public void testNotADumpArchiveButBigEnough() throws Exception { + try (FileInputStream is = new FileInputStream(getFile("zip64support.tar.bz2"))) { + new DumpArchiveInputStream(is).close(); + fail("expected an exception"); + } catch (final ArchiveException ex) { + // expected + assertTrue(ex.getCause() instanceof UnrecognizedFormatException); + } + } + + @Test + public void testConsumesArchiveCompletely() throws Exception { + final InputStream is = DumpArchiveInputStreamTest.class + .getResourceAsStream("/archive_with_trailer.dump"); + final DumpArchiveInputStream dump = new DumpArchiveInputStream(is); + while (dump.getNextDumpEntry() != null) { + // just consume the archive + } + final byte[] expected = new byte[] { + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', '\n' + }; + final byte[] actual = new byte[expected.length]; + is.read(actual); + assertArrayEquals(expected, actual); + dump.close(); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + try (FileInputStream in = new FileInputStream(getFile("bla.dump")); + DumpArchiveInputStream archive = new DumpArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read()); + assertEquals(-1, archive.read()); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + byte[] buf = new byte[2]; + try (FileInputStream in = new FileInputStream(getFile("bla.dump")); + DumpArchiveInputStream archive = new DumpArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read(buf)); + assertEquals(-1, archive.read(buf)); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtilTest.java b/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtilTest.java new file mode 100644 index 000000000..2aceca301 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtilTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.dump; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class DumpArchiveUtilTest { + + @Test + public void convert64() { + assertEquals(0xABCDEF0123456780L, + DumpArchiveUtil.convert64(new byte[] { + (byte) 0x80, 0x67, 0x45, 0x23, 1, (byte) 0xEF, + (byte) 0xCD, (byte) 0xAB + }, 0)); + } + + @Test + public void convert32() { + assertEquals(0xABCDEF01, + DumpArchiveUtil.convert32(new byte[] { + 1, (byte) 0xEF, (byte) 0xCD, (byte) 0xAB + }, 0)); + } + + @Test + public void convert16() { + assertEquals(0xABCD, + DumpArchiveUtil.convert16(new byte[] { + (byte) 0xCD, (byte) 0xAB + }, 0)); + } +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/examples/ExpanderTest.java b/src/test/java/org/apache/commons/compress/archivers/examples/ExpanderTest.java new file mode 100644 index 000000000..d14a27375 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/examples/ExpanderTest.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.examples; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.archivers.StreamingNotSupportedException; +import org.apache.commons.compress.archivers.sevenz.SevenZFile; +import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class ExpanderTest extends AbstractTestCase { + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + private File archive; + + @Test + public void sevenZTwoFileVersion() throws IOException, ArchiveException { + setup7z(); + new Expander().expand("7z", archive, resultDir); + verifyTargetDir(); + } + + @Test + public void sevenZTwoFileVersionWithAutoDetection() throws IOException, ArchiveException { + setup7z(); + new Expander().expand(archive, resultDir); + verifyTargetDir(); + } + + @Test(expected = StreamingNotSupportedException.class) + public void sevenZInputStreamVersion() throws IOException, ArchiveException { + setup7z(); + try (InputStream i = new BufferedInputStream(Files.newInputStream(archive.toPath()))) { + new Expander().expand("7z", i, resultDir); + } + } + + @Test(expected = StreamingNotSupportedException.class) + public void sevenZInputStreamVersionWithAutoDetection() throws IOException, ArchiveException { + setup7z(); + try (InputStream i = new BufferedInputStream(Files.newInputStream(archive.toPath()))) { + new Expander().expand(i, resultDir); + } + } + + @Test + public void sevenZChannelVersion() throws IOException, ArchiveException { + setup7z(); + try (SeekableByteChannel c = FileChannel.open(archive.toPath(), StandardOpenOption.READ)) { + new Expander().expand("7z", c, resultDir); + } + verifyTargetDir(); + } + + @Test + public void sevenZFileVersion() throws IOException, ArchiveException { + setup7z(); + try (SevenZFile f = new SevenZFile(archive)) { + new Expander().expand(f, resultDir); + } + verifyTargetDir(); + } + + @Test + public void zipFileVersion() throws IOException, ArchiveException { + setupZip(); + try (ZipFile f = new ZipFile(archive)) { + new Expander().expand(f, resultDir); + } + verifyTargetDir(); + } + + @Test + public void fileCantEscapeViaAbsolutePath() throws IOException, ArchiveException { + setupZip("/tmp/foo"); + try (ZipFile f = new ZipFile(archive)) { + new Expander().expand(f, resultDir); + } + assertHelloWorld("tmp/foo", "1"); + } + + @Test + public void fileCantEscapeDoubleDotPath() throws IOException, ArchiveException { + thrown.expect(IOException.class); + thrown.expectMessage("expanding ../foo would create file outside of"); + setupZip("../foo"); + try (ZipFile f = new ZipFile(archive)) { + new Expander().expand(f, resultDir); + } + } + + @Test + public void fileCantEscapeDoubleDotPathWithSimilarSibling() throws IOException, ArchiveException { + String sibling = resultDir.getName() + "x"; + File s = new File(resultDir.getParentFile(), sibling); + Assume.assumeFalse(s.exists()); + s.mkdirs(); + Assume.assumeTrue(s.exists()); + s.deleteOnExit(); + try { + thrown.expect(IOException.class); + thrown.expectMessage("expanding ../" + sibling + "/a would create file outside of"); + setupZip("../" + sibling + "/a"); + try (ZipFile f = new ZipFile(archive)) { + new Expander().expand(f, resultDir); + } + } finally { + tryHardToDelete(s); + } + } + + private void setup7z() throws IOException, ArchiveException { + archive = new File(dir, "test.7z"); + File dummy = new File(dir, "x"); + try (OutputStream o = Files.newOutputStream(dummy.toPath())) { + o.write(new byte[14]); + } + try (SevenZOutputFile aos = new SevenZOutputFile(archive)) { + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a/b")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a/b/c")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dummy, "a/b/d.txt")); + aos.write("Hello, world 1".getBytes(StandardCharsets.UTF_8)); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dummy, "a/b/c/e.txt")); + aos.write("Hello, world 2".getBytes(StandardCharsets.UTF_8)); + aos.closeArchiveEntry(); + aos.finish(); + } + } + + private void setupZip() throws IOException, ArchiveException { + archive = new File(dir, "test.zip"); + File dummy = new File(dir, "x"); + try (OutputStream o = Files.newOutputStream(dummy.toPath())) { + o.write(new byte[14]); + } + try (ArchiveOutputStream aos = new ArchiveStreamFactory() + .createArchiveOutputStream("zip", Files.newOutputStream(archive.toPath()))) { + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a/b")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a/b/c")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dummy, "a/b/d.txt")); + aos.write("Hello, world 1".getBytes(StandardCharsets.UTF_8)); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dummy, "a/b/c/e.txt")); + aos.write("Hello, world 2".getBytes(StandardCharsets.UTF_8)); + aos.closeArchiveEntry(); + aos.finish(); + } + } + + private void setupZip(String entry) throws IOException, ArchiveException { + archive = new File(dir, "test.zip"); + File dummy = new File(dir, "x"); + try (OutputStream o = Files.newOutputStream(dummy.toPath())) { + o.write(new byte[14]); + } + try (ArchiveOutputStream aos = new ArchiveStreamFactory() + .createArchiveOutputStream("zip", Files.newOutputStream(archive.toPath()))) { + aos.putArchiveEntry(aos.createArchiveEntry(dummy, entry)); + aos.write("Hello, world 1".getBytes(StandardCharsets.UTF_8)); + aos.closeArchiveEntry(); + aos.finish(); + } + } + + private void verifyTargetDir() throws IOException { + Assert.assertTrue("a has not been created", new File(resultDir, "a").isDirectory()); + Assert.assertTrue("a/b has not been created", new File(resultDir, "a/b").isDirectory()); + Assert.assertTrue("a/b/c has not been created", new File(resultDir, "a/b/c").isDirectory()); + assertHelloWorld("a/b/d.txt", "1"); + assertHelloWorld("a/b/c/e.txt", "2"); + } + + private void assertHelloWorld(String fileName, String suffix) throws IOException { + Assert.assertTrue(fileName + " does not exist", new File(resultDir, fileName).isFile()); + byte[] expected = ("Hello, world " + suffix).getBytes(StandardCharsets.UTF_8); + try (InputStream is = Files.newInputStream(new File(resultDir, fileName).toPath())) { + byte[] actual = IOUtils.toByteArray(is); + Assert.assertArrayEquals(expected, actual); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/examples/ParameterizedArchiverTest.java b/src/test/java/org/apache/commons/compress/archivers/examples/ParameterizedArchiverTest.java new file mode 100644 index 000000000..22676109a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/examples/ParameterizedArchiverTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.examples; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized.Parameters; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class ParameterizedArchiverTest extends AbstractTestCase { + + // can't test 7z here as 7z cannot write to non-seekable streams + // and reading logic would be different as well - see + // SevenZArchiverTest class + @Parameters(name = "format={0}") + public static Collection<Object[]> data() { + return Arrays.asList( + new Object[] { "tar" }, + new Object[] { "cpio" }, + new Object[] { "zip" } + ); + } + + private final String format; + private File target; + + public ParameterizedArchiverTest(String format) { + this.format = format; + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + File c = new File(dir, "a/b/c"); + c.mkdirs(); + try (OutputStream os = Files.newOutputStream(new File(dir, "a/b/d.txt").toPath())) { + os.write("Hello, world 1".getBytes(StandardCharsets.UTF_8)); + } + try (OutputStream os = Files.newOutputStream(new File(dir, "a/b/c/e.txt").toPath())) { + os.write("Hello, world 2".getBytes(StandardCharsets.UTF_8)); + } + target = new File(resultDir, "test." + format); + } + + @Test + public void fileVersion() throws IOException, ArchiveException { + new Archiver().create(format, target, dir); + verifyContent(); + } + + @Test + public void outputStreamVersion() throws IOException, ArchiveException { + try (OutputStream os = Files.newOutputStream(target.toPath())) { + new Archiver().create(format, os, dir); + } + verifyContent(); + } + + @Test + public void channelVersion() throws IOException, ArchiveException { + try (SeekableByteChannel c = FileChannel.open(target.toPath(), StandardOpenOption.WRITE, + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) { + new Archiver().create(format, c, dir); + } + verifyContent(); + } + + @Test + public void archiveStreamVersion() throws IOException, ArchiveException { + try (OutputStream os = Files.newOutputStream(target.toPath()); + ArchiveOutputStream aos = new ArchiveStreamFactory().createArchiveOutputStream(format, os)) { + new Archiver().create(aos, dir); + } + verifyContent(); + } + + private void verifyContent() throws IOException, ArchiveException { + try (InputStream is = Files.newInputStream(target.toPath()); + BufferedInputStream bis = new BufferedInputStream(is); + ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(format, bis)) { + assertDir("a", ais.getNextEntry()); + assertDir("a/b", ais.getNextEntry()); + ArchiveEntry n = ais.getNextEntry(); + Assert.assertNotNull(n); + // File.list may return a/b/c or a/b/d.txt first + if (n.getName().endsWith("/")) { + assertDir("a/b/c", n); + assertHelloWorld("a/b/c/e.txt", "2", ais.getNextEntry(), ais); + assertHelloWorld("a/b/d.txt", "1", ais.getNextEntry(), ais); + } else { + assertHelloWorld("a/b/d.txt", "1", n, ais); + assertDir("a/b/c", ais.getNextEntry()); + assertHelloWorld("a/b/c/e.txt", "2", ais.getNextEntry(), ais); + } + } + } + + private void assertDir(String expectedName, ArchiveEntry entry) { + Assert.assertNotNull(expectedName + " does not exists", entry); + Assert.assertEquals(expectedName + "/", entry.getName()); + Assert.assertTrue(expectedName + " is not a directory", entry.isDirectory()); + } + + private void assertHelloWorld(String expectedName, String suffix, ArchiveEntry entry, InputStream is) + throws IOException { + Assert.assertNotNull(expectedName + " does not exists", entry); + Assert.assertEquals(expectedName, entry.getName()); + Assert.assertFalse(expectedName + " is a directory", entry.isDirectory()); + byte[] expected = ("Hello, world " + suffix).getBytes(StandardCharsets.UTF_8); + byte[] actual = IOUtils.toByteArray(is); + Assert.assertArrayEquals(expected, actual); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/examples/ParameterizedExpanderTest.java b/src/test/java/org/apache/commons/compress/archivers/examples/ParameterizedExpanderTest.java new file mode 100644 index 000000000..5d6da4b8e --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/examples/ParameterizedExpanderTest.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.examples; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized.Parameters; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class ParameterizedExpanderTest extends AbstractTestCase { + + // 7z and ZIP using ZipFile is in ExpanderTest + @Parameters(name = "format={0}") + public static Collection<Object[]> data() { + return Arrays.asList( + new Object[] { "tar" }, + new Object[] { "cpio" }, + new Object[] { "zip" } + ); + } + + private final String format; + private File archive; + + public ParameterizedExpanderTest(String format) { + this.format = format; + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + archive = new File(dir, "test." + format); + File dummy = new File(dir, "x"); + try (OutputStream o = Files.newOutputStream(dummy.toPath())) { + o.write(new byte[14]); + } + try (ArchiveOutputStream aos = new ArchiveStreamFactory() + .createArchiveOutputStream(format, Files.newOutputStream(archive.toPath()))) { + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a/b")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dir, "a/b/c")); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dummy, "a/b/d.txt")); + aos.write("Hello, world 1".getBytes(StandardCharsets.UTF_8)); + aos.closeArchiveEntry(); + aos.putArchiveEntry(aos.createArchiveEntry(dummy, "a/b/c/e.txt")); + aos.write("Hello, world 2".getBytes(StandardCharsets.UTF_8)); + aos.closeArchiveEntry(); + aos.finish(); + } + } + + @Test + public void fileVersion() throws IOException, ArchiveException { + new Expander().expand(format, archive, resultDir); + verifyTargetDir(); + } + + @Test + public void fileVersionWithAutoDetection() throws IOException, ArchiveException { + new Expander().expand(archive, resultDir); + verifyTargetDir(); + } + + @Test + public void inputStreamVersion() throws IOException, ArchiveException { + try (InputStream i = new BufferedInputStream(Files.newInputStream(archive.toPath()))) { + new Expander().expand(format, i, resultDir); + } + verifyTargetDir(); + } + + @Test + public void inputStreamVersionWithAutoDetection() throws IOException, ArchiveException { + try (InputStream i = new BufferedInputStream(Files.newInputStream(archive.toPath()))) { + new Expander().expand(i, resultDir); + } + verifyTargetDir(); + } + + @Test + public void channelVersion() throws IOException, ArchiveException { + try (SeekableByteChannel c = FileChannel.open(archive.toPath(), StandardOpenOption.READ)) { + new Expander().expand(format, c, resultDir); + } + verifyTargetDir(); + } + + @Test + public void archiveInputStreamVersion() throws IOException, ArchiveException { + try (InputStream i = new BufferedInputStream(Files.newInputStream(archive.toPath())); + ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(format, i)) { + new Expander().expand(ais, resultDir); + } + verifyTargetDir(); + } + + private void verifyTargetDir() throws IOException { + Assert.assertTrue("a has not been created", new File(resultDir, "a").isDirectory()); + Assert.assertTrue("a/b has not been created", new File(resultDir, "a/b").isDirectory()); + Assert.assertTrue("a/b/c has not been created", new File(resultDir, "a/b/c").isDirectory()); + assertHelloWorld("a/b/d.txt", "1"); + assertHelloWorld("a/b/c/e.txt", "2"); + } + + private void assertHelloWorld(String fileName, String suffix) throws IOException { + Assert.assertTrue(fileName + " does not exist", new File(resultDir, fileName).isFile()); + byte[] expected = ("Hello, world " + suffix).getBytes(StandardCharsets.UTF_8); + try (InputStream is = Files.newInputStream(new File(resultDir, fileName).toPath())) { + byte[] actual = IOUtils.toByteArray(is); + Assert.assertArrayEquals(expected, actual); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/examples/SevenZArchiverTest.java b/src/test/java/org/apache/commons/compress/archivers/examples/SevenZArchiverTest.java new file mode 100644 index 000000000..635c89990 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/examples/SevenZArchiverTest.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.examples; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.StreamingNotSupportedException; +import org.apache.commons.compress.archivers.sevenz.SevenZFile; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class SevenZArchiverTest extends AbstractTestCase { + private File target; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + File c = new File(dir, "a/b/c"); + c.mkdirs(); + try (OutputStream os = Files.newOutputStream(new File(dir, "a/b/d.txt").toPath())) { + os.write("Hello, world 1".getBytes(StandardCharsets.UTF_8)); + } + try (OutputStream os = Files.newOutputStream(new File(dir, "a/b/c/e.txt").toPath())) { + os.write("Hello, world 2".getBytes(StandardCharsets.UTF_8)); + } + target = new File(resultDir, "test.7z"); + } + + @Test + public void fileVersion() throws IOException, ArchiveException { + new Archiver().create("7z", target, dir); + verifyContent(); + } + + @Test(expected = StreamingNotSupportedException.class) + public void outputStreamVersion() throws IOException, ArchiveException { + try (OutputStream os = Files.newOutputStream(target.toPath())) { + new Archiver().create("7z", os, dir); + } + } + + @Test + public void channelVersion() throws IOException, ArchiveException { + try (SeekableByteChannel c = FileChannel.open(target.toPath(), StandardOpenOption.WRITE, + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) { + new Archiver().create("7z", c, dir); + } + verifyContent(); + } + + // not really a 7z test but I didn't feel like adding a new test just for this + @Test(expected = ArchiveException.class) + public void unknownFormat() throws IOException, ArchiveException { + try (SeekableByteChannel c = FileChannel.open(target.toPath(), StandardOpenOption.WRITE, + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) { + new Archiver().create("unknown format", c, dir); + } + } + + private void verifyContent() throws IOException, ArchiveException { + try (SevenZFile z = new SevenZFile(target)) { + assertDir("a", z.getNextEntry()); + assertDir("a/b", z.getNextEntry()); + ArchiveEntry n = z.getNextEntry(); + Assert.assertNotNull(n); + // File.list may return a/b/c or a/b/d.txt first + if (n.getName().endsWith("/")) { + assertDir("a/b/c", n); + assertHelloWorld("a/b/c/e.txt", "2", z.getNextEntry(), z); + assertHelloWorld("a/b/d.txt", "1", z.getNextEntry(), z); + } else { + assertHelloWorld("a/b/d.txt", "1", n, z); + assertDir("a/b/c", z.getNextEntry()); + assertHelloWorld("a/b/c/e.txt", "2", z.getNextEntry(), z); + } + } + } + + private void assertDir(String expectedName, ArchiveEntry entry) { + Assert.assertNotNull(expectedName + " does not exists", entry); + Assert.assertEquals(expectedName + "/", entry.getName()); + Assert.assertTrue(expectedName + " is not a directory", entry.isDirectory()); + } + + private void assertHelloWorld(String expectedName, String suffix, ArchiveEntry entry, SevenZFile z) + throws IOException { + Assert.assertNotNull(expectedName + " does not exists", entry); + Assert.assertEquals(expectedName, entry.getName()); + Assert.assertFalse(expectedName + " is a directory", entry.isDirectory()); + byte[] expected = ("Hello, world " + suffix).getBytes(StandardCharsets.UTF_8); + byte[] actual = new byte[expected.length]; + Assert.assertEquals(actual.length, z.read(actual)); + Assert.assertEquals(-1, z.read()); + Assert.assertArrayEquals(expected, actual); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/jar/JarArchiveOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/jar/JarArchiveOutputStreamTest.java new file mode 100644 index 000000000..3008395cd --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/jar/JarArchiveOutputStreamTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.jar; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.junit.Test; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.zip.JarMarker; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipExtraField; +import org.apache.commons.compress.archivers.zip.ZipFile; + +public class JarArchiveOutputStreamTest { + + @Test + public void testJarMarker() throws IOException { + final File testArchive = File.createTempFile("jar-aostest", ".jar"); + testArchive.deleteOnExit(); + JarArchiveOutputStream out = null; + ZipFile zf = null; + try { + + out = new JarArchiveOutputStream(new FileOutputStream(testArchive)); + out.putArchiveEntry(new ZipArchiveEntry("foo/")); + out.closeArchiveEntry(); + out.putArchiveEntry(new ZipArchiveEntry("bar/")); + out.closeArchiveEntry(); + out.finish(); + out.close(); + out = null; + + zf = new ZipFile(testArchive); + ZipArchiveEntry ze = zf.getEntry("foo/"); + assertNotNull(ze); + ZipExtraField[] fes = ze.getExtraFields(); + assertEquals(1, fes.length); + assertTrue(fes[0] instanceof JarMarker); + + ze = zf.getEntry("bar/"); + assertNotNull(ze); + fes = ze.getExtraFields(); + assertEquals(0, fes.length); + } finally { + if (out != null) { + try { + out.close(); + } catch (final IOException e) { /* swallow */ } + } + ZipFile.closeQuietly(zf); + AbstractTestCase.tryHardToDelete(testArchive); + } + } + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/jar/JarMarkerTest.java b/src/test/java/org/apache/commons/compress/archivers/jar/JarMarkerTest.java new file mode 100644 index 000000000..507a814e9 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/jar/JarMarkerTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.jar; + +import static org.junit.Assert.fail; + +import java.util.zip.ZipException; +import org.apache.commons.compress.archivers.zip.JarMarker; +import org.junit.Test; + +public class JarMarkerTest { + + @Test public void testJarMarkerLengthCheck() { + JarMarker jarMarker = JarMarker.getInstance(); + try { + jarMarker.parseFromLocalFileData(null,0,1); + fail("should have thrown exception due to length of 1"); + } catch (ZipException e) { + + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveEntry.java b/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveEntry.java new file mode 100644 index 000000000..86efb097c --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveEntry.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.memory; + +import java.util.Date; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +public final class MemoryArchiveEntry implements ArchiveEntry { + + private final String name; + + public MemoryArchiveEntry(final String pName) { + name = pName; + } + + @Override + public String getName() { + return name; + } + + @Override + public long getSize() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean isDirectory() { + // TODO Auto-generated method stub + return false; + } + + @Override + public Date getLastModifiedDate() { + return new Date(); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveInputStream.java b/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveInputStream.java new file mode 100644 index 000000000..a3b834790 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveInputStream.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.memory; + +import java.io.IOException; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; + +public final class MemoryArchiveInputStream extends ArchiveInputStream { + + private final String[] filenames; + private final String[] content; + private int p; + + public MemoryArchiveInputStream( final String[][] pFiles ) { + filenames = new String[pFiles.length]; + content = new String[pFiles.length]; + + for (int i = 0; i < pFiles.length; i++) { + final String[] nameAndContent = pFiles[i]; + filenames[i] = nameAndContent[0]; + content[i] = nameAndContent[1]; + } + p = 0; + } + + @Override + public ArchiveEntry getNextEntry() throws IOException { + if (p >= filenames.length) { + return null; + } + + return new MemoryArchiveEntry(filenames[p]); + } + + public String readString() { + return content[p++]; + } + + @Override + public int read() throws IOException { + return 0; + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveTestCase.java b/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveTestCase.java new file mode 100644 index 000000000..238675d0c --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/memory/MemoryArchiveTestCase.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.memory; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.junit.Test; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +public final class MemoryArchiveTestCase { + + @Test + public void testReading() throws IOException { + + final MemoryArchiveInputStream is = new MemoryArchiveInputStream(new String[][] { + { "test1", "content1" }, + { "test2", "content2" }, + }); + + final ArchiveEntry entry1 = is.getNextEntry(); + assertNotNull(entry1); + assertEquals("test1", entry1.getName()); + final String content1 = is.readString(); + assertEquals("content1", content1); + + final ArchiveEntry entry2 = is.getNextEntry(); + assertNotNull(entry2); + assertEquals("test2", entry2.getName()); + final String content2 = is.readString(); + assertEquals("content2", content2); + + final ArchiveEntry entry3 = is.getNextEntry(); + assertNull(entry3); + + is.close(); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256DecoderTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256DecoderTest.java new file mode 100644 index 000000000..b3b68b1df --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256DecoderTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.sevenz; + +import org.junit.Test; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; + +import static org.junit.Assert.*; + + +/** + * Unit tests for class {@link AES256SHA256Decoder}. + * + * @date 26.06.2017 + * @see AES256SHA256Decoder + **/ +public class AES256SHA256DecoderTest { + + + @Test + public void testDecodeWithNonEmptyString() throws IOException { + + AES256SHA256Decoder aES256SHA256Decoder = new AES256SHA256Decoder(); + BufferedInputStream bufferedInputStream = new BufferedInputStream(null, 3138); + Coder coder = new Coder(); + byte[] byteArray = new byte[8]; + byteArray[1] = (byte) (-72); + coder.properties = byteArray; + InputStream inputStream = aES256SHA256Decoder.decode("x", bufferedInputStream, 3138, coder, coder.properties); + + ObjectInputStream objectInputStream = null; + + try { + objectInputStream = new ObjectInputStream(inputStream); + fail("Expecting exception: IOException"); + } catch(Throwable e) { + assertEquals("Salt size + IV size too long in x",e.getMessage()); + assertEquals("org.apache.commons.compress.archivers.sevenz.AES256SHA256Decoder$1", e.getStackTrace()[0].getClassName()); + } + + } + + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/CoverageTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/CoverageTest.java new file mode 100644 index 000000000..4bfe3c500 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/CoverageTest.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.sevenz; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import org.junit.Test; + +public class CoverageTest { + + @Test public void testNidInstance() { + assertNotNull(new NID()); + } + + @Test public void testCLIInstance() { + CLI foo = new CLI(); + assertNotNull(foo); + try { + CLI.main(new String[]{"/dev/null/not-there"}); + fail("shouldn't be able to list contents of a file that isn't there"); + } catch (Exception ignored) { + + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/FolderTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/FolderTest.java new file mode 100644 index 000000000..1150b892a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/FolderTest.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.sevenz; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + + +/** + * Unit tests for class {@link Folder}. + * + * @date 26.06.2017 + * @see Folder + **/ +public class FolderTest { + + + @Test + public void testGetUnpackSizeForCoderOne() { + + Folder folder = new Folder(); + Coder[] coderArray = new Coder[5]; + Coder coder = new Coder(); + folder.coders = coderArray; + + assertEquals(0L, folder.getUnpackSizeForCoder(coder)); + + } + + + @Test + public void testGetUnpackSizeOne() { + + Folder folder = new Folder(); + folder.totalOutputStreams = 266L; + BindPair[] bindPairArray = new BindPair[1]; + BindPair bindPair = new BindPair(); + bindPairArray[0] = bindPair; + folder.bindPairs = bindPairArray; + folder.totalOutputStreams = 1L; + + assertEquals(0L, folder.getUnpackSize()); + + } + + + @Test + public void testGetUnpackSizeTwo() { + + Folder folder = new Folder(); + + assertEquals(0L, folder.getUnpackSize()); + + } + + + @Test + public void testFindBindPairForInStream() { + + Folder folder = new Folder(); + BindPair[] bindPairArray = new BindPair[1]; + BindPair bindPair = new BindPair(); + bindPairArray[0] = bindPair; + folder.bindPairs = bindPairArray; + + assertEquals(0, folder.findBindPairForInStream(0)); + + } + + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntryTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntryTest.java new file mode 100644 index 000000000..daf618655 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntryTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import org.junit.Test; + +public class SevenZArchiveEntryTest { + + @Test(expected=UnsupportedOperationException.class) + public void shouldThrowIfNoLastModifiedDateIsSet() { + new SevenZArchiveEntry().getLastModifiedDate(); + } + + @Test(expected=UnsupportedOperationException.class) + public void shouldThrowIfNoCreationDateIsSet() { + new SevenZArchiveEntry().getCreationDate(); + } + + @Test(expected=UnsupportedOperationException.class) + public void shouldThrowIfNoAccessDateIsSet() { + new SevenZArchiveEntry().getAccessDate(); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java new file mode 100644 index 000000000..776ad4d3d --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import static org.junit.Assert.*; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; + +import javax.crypto.Cipher; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.PasswordRequiredException; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; +import org.junit.Test; + +public class SevenZFileTest extends AbstractTestCase { + private static final String TEST2_CONTENT = "<?xml version = '1.0'?>\r\n<!DOCTYPE" + + " connections>\r\n<meinxml>\r\n\t<leer />\r\n</meinxml>\n"; + + // https://issues.apache.org/jira/browse/COMPRESS-320 + @Test + public void testRandomlySkippingEntries() throws Exception { + // Read sequential reference. + final Map<String, byte[]> entriesByName = new HashMap<>(); + SevenZFile archive = new SevenZFile(getFile("COMPRESS-320/Copy.7z")); + SevenZArchiveEntry entry; + while ((entry = archive.getNextEntry()) != null) { + if (entry.hasStream()) { + entriesByName.put(entry.getName(), readFully(archive)); + } + } + archive.close(); + + final String[] variants = { + "BZip2-solid.7z", + "BZip2.7z", + "Copy-solid.7z", + "Copy.7z", + "Deflate-solid.7z", + "Deflate.7z", + "LZMA-solid.7z", + "LZMA.7z", + "LZMA2-solid.7z", + "LZMA2.7z", + // TODO: unsupported compression method. + // "PPMd-solid.7z", + // "PPMd.7z", + }; + + // TODO: use randomizedtesting for predictable, but different, randomness. + final Random rnd = new Random(0xdeadbeef); + for (final String fileName : variants) { + archive = new SevenZFile(getFile("COMPRESS-320/" + fileName)); + + while ((entry = archive.getNextEntry()) != null) { + // Sometimes skip reading entries. + if (rnd.nextBoolean()) { + continue; + } + + if (entry.hasStream()) { + assertTrue(entriesByName.containsKey(entry.getName())); + final byte [] content = readFully(archive); + assertTrue("Content mismatch on: " + fileName + "!" + entry.getName(), + Arrays.equals(content, entriesByName.get(entry.getName()))); + } + } + + archive.close(); + } + } + + private byte[] readFully(final SevenZFile archive) throws IOException { + final byte [] buf = new byte [1024]; + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + for (int len = 0; (len = archive.read(buf)) > 0;) { + baos.write(buf, 0, len); + } + return baos.toByteArray(); + } + + @Test + public void testAllEmptyFilesArchive() throws Exception { + try (SevenZFile archive = new SevenZFile(getFile("7z-empty-mhc-off.7z"))) { + assertNotNull(archive.getNextEntry()); + } + } + + @Test + public void testHelloWorldHeaderCompressionOffCopy() throws Exception { + checkHelloWorld("7z-hello-mhc-off-copy.7z"); + } + + @Test + public void testHelloWorldHeaderCompressionOffLZMA2() throws Exception { + checkHelloWorld("7z-hello-mhc-off-lzma2.7z"); + } + + @Test + public void test7zUnarchive() throws Exception { + test7zUnarchive(getFile("bla.7z"), SevenZMethod.LZMA); + } + + @Test + public void test7zDeflateUnarchive() throws Exception { + test7zUnarchive(getFile("bla.deflate.7z"), SevenZMethod.DEFLATE); + } + + @Test + public void test7zDeflate64Unarchive() throws Exception { + test7zUnarchive(getFile("bla.deflate64.7z"), SevenZMethod.DEFLATE64); + } + + @Test + public void test7zDecryptUnarchive() throws Exception { + if (isStrongCryptoAvailable()) { + test7zUnarchive(getFile("bla.encrypted.7z"), SevenZMethod.LZMA, // stack LZMA + AES + "foo".getBytes("UTF-16LE")); + } + } + + @Test + public void test7zDecryptUnarchiveUsingCharArrayPassword() throws Exception { + if (isStrongCryptoAvailable()) { + test7zUnarchive(getFile("bla.encrypted.7z"), SevenZMethod.LZMA, // stack LZMA + AES + "foo".toCharArray()); + } + } + + private void test7zUnarchive(final File f, final SevenZMethod m) throws Exception { + test7zUnarchive(f, m, (char[]) null); + } + + @Test + public void testEncryptedArchiveRequiresPassword() throws Exception { + try { + new SevenZFile(getFile("bla.encrypted.7z")).close(); + fail("shouldn't decrypt without a password"); + } catch (final PasswordRequiredException ex) { + final String msg = ex.getMessage(); + assertTrue("Should start with whining about being unable to decrypt", + msg.startsWith("Cannot read encrypted content from ")); + assertTrue("Should finish the sentence properly", + msg.endsWith(" without a password.")); + assertTrue("Should contain archive's name", + msg.contains("bla.encrypted.7z")); + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-256" + */ + @Test + public void testCompressedHeaderWithNonDefaultDictionarySize() throws Exception { + try (SevenZFile sevenZFile = new SevenZFile(getFile("COMPRESS-256.7z"))) { + int count = 0; + while (sevenZFile.getNextEntry() != null) { + count++; + } + assertEquals(446, count); + } + } + + @Test + public void testSignatureCheck() { + assertTrue(SevenZFile.matches(SevenZFile.sevenZSignature, + SevenZFile.sevenZSignature.length)); + assertTrue(SevenZFile.matches(SevenZFile.sevenZSignature, + SevenZFile.sevenZSignature.length + 1)); + assertFalse(SevenZFile.matches(SevenZFile.sevenZSignature, + SevenZFile.sevenZSignature.length - 1)); + assertFalse(SevenZFile.matches(new byte[] { 1, 2, 3, 4, 5, 6 }, 6)); + assertTrue(SevenZFile.matches(new byte[] { '7', 'z', (byte) 0xBC, + (byte) 0xAF, 0x27, 0x1C}, 6)); + assertFalse(SevenZFile.matches(new byte[] { '7', 'z', (byte) 0xBC, + (byte) 0xAF, 0x27, 0x1D}, 6)); + } + + @Test + public void testReadingBackLZMA2DictSize() throws Exception { + final File output = new File(dir, "lzma2-dictsize.7z"); + try (SevenZOutputFile outArchive = new SevenZOutputFile(output)) { + outArchive.setContentMethods(Arrays.asList(new SevenZMethodConfiguration(SevenZMethod.LZMA2, 1 << 20))); + final SevenZArchiveEntry entry = new SevenZArchiveEntry(); + entry.setName("foo.txt"); + outArchive.putArchiveEntry(entry); + outArchive.write(new byte[] { 'A' }); + outArchive.closeArchiveEntry(); + } + + try (SevenZFile archive = new SevenZFile(output)) { + final SevenZArchiveEntry entry = archive.getNextEntry(); + final SevenZMethodConfiguration m = entry.getContentMethods().iterator().next(); + assertEquals(SevenZMethod.LZMA2, m.getMethod()); + assertEquals(1 << 20, m.getOptions()); + } + } + + @Test + public void testReadingBackDeltaDistance() throws Exception { + final File output = new File(dir, "delta-distance.7z"); + try (SevenZOutputFile outArchive = new SevenZOutputFile(output)) { + outArchive.setContentMethods(Arrays.asList(new SevenZMethodConfiguration(SevenZMethod.DELTA_FILTER, 32), + new SevenZMethodConfiguration(SevenZMethod.LZMA2))); + final SevenZArchiveEntry entry = new SevenZArchiveEntry(); + entry.setName("foo.txt"); + outArchive.putArchiveEntry(entry); + outArchive.write(new byte[] { 'A' }); + outArchive.closeArchiveEntry(); + } + + try (SevenZFile archive = new SevenZFile(output)) { + final SevenZArchiveEntry entry = archive.getNextEntry(); + final SevenZMethodConfiguration m = entry.getContentMethods().iterator().next(); + assertEquals(SevenZMethod.DELTA_FILTER, m.getMethod()); + assertEquals(32, m.getOptions()); + } + } + + @Test + public void getEntriesOfUnarchiveTest() throws IOException { + try (SevenZFile sevenZFile = new SevenZFile(getFile("bla.7z"))) { + final Iterable<SevenZArchiveEntry> entries = sevenZFile.getEntries(); + final Iterator<SevenZArchiveEntry> iter = entries.iterator(); + SevenZArchiveEntry entry = iter.next(); + assertEquals("test1.xml", entry.getName()); + entry = iter.next(); + assertEquals("test2.xml", entry.getName()); + assertFalse(iter.hasNext()); + } + } + + @Test + public void getEntriesOfUnarchiveInMemoryTest() throws IOException { + byte[] data = null; + try (FileInputStream fis = new FileInputStream(getFile("bla.7z"))) { + data = IOUtils.toByteArray(fis); + } + try (SevenZFile sevenZFile = new SevenZFile(new SeekableInMemoryByteChannel(data))) { + final Iterable<SevenZArchiveEntry> entries = sevenZFile.getEntries(); + final Iterator<SevenZArchiveEntry> iter = entries.iterator(); + SevenZArchiveEntry entry = iter.next(); + assertEquals("test1.xml", entry.getName()); + entry = iter.next(); + assertEquals("test2.xml", entry.getName()); + assertFalse(iter.hasNext()); + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-348" + */ + @Test + public void readEntriesOfSize0() throws IOException { + try (SevenZFile sevenZFile = new SevenZFile(getFile("COMPRESS-348.7z"))) { + int entries = 0; + SevenZArchiveEntry entry = sevenZFile.getNextEntry(); + while (entry != null) { + entries++; + final int b = sevenZFile.read(); + if ("2.txt".equals(entry.getName()) || "5.txt".equals(entry.getName())) { + assertEquals(-1, b); + } else { + assertNotEquals(-1, b); + } + entry = sevenZFile.getNextEntry(); + } + assertEquals(5, entries); + } + } + + private void test7zUnarchive(final File f, final SevenZMethod m, final byte[] password) throws Exception { + try (SevenZFile sevenZFile = new SevenZFile(f, password)) { + test7zUnarchive(sevenZFile, m); + } + } + + private void test7zUnarchive(final File f, final SevenZMethod m, final char[] password) throws Exception { + try (SevenZFile sevenZFile = new SevenZFile(f, password)) { + test7zUnarchive(sevenZFile, m); + } + } + + private void test7zUnarchive(SevenZFile sevenZFile, final SevenZMethod m) throws Exception { + SevenZArchiveEntry entry = sevenZFile.getNextEntry(); + assertEquals("test1.xml", entry.getName()); + assertEquals(m, entry.getContentMethods().iterator().next().getMethod()); + entry = sevenZFile.getNextEntry(); + assertEquals("test2.xml", entry.getName()); + assertEquals(m, entry.getContentMethods().iterator().next().getMethod()); + final byte[] contents = new byte[(int) entry.getSize()]; + int off = 0; + while ((off < contents.length)) { + final int bytesRead = sevenZFile.read(contents, off, contents.length - off); + assert (bytesRead >= 0); + off += bytesRead; + } + assertEquals(TEST2_CONTENT, new String(contents, "UTF-8")); + assertNull(sevenZFile.getNextEntry()); + } + + private void checkHelloWorld(final String filename) throws Exception { + try (SevenZFile sevenZFile = new SevenZFile(getFile(filename))) { + final SevenZArchiveEntry entry = sevenZFile.getNextEntry(); + assertEquals("Hello world.txt", entry.getName()); + final byte[] contents = new byte[(int) entry.getSize()]; + int off = 0; + while ((off < contents.length)) { + final int bytesRead = sevenZFile.read(contents, off, contents.length - off); + assert (bytesRead >= 0); + off += bytesRead; + } + assertEquals("Hello, world!\n", new String(contents, "UTF-8")); + assertNull(sevenZFile.getNextEntry()); + } + } + + private static boolean isStrongCryptoAvailable() throws NoSuchAlgorithmException { + return Cipher.getMaxAllowedKeyLength("AES/ECB/PKCS5Padding") >= 256; + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfigurationTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfigurationTest.java new file mode 100644 index 000000000..20fccbd84 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfigurationTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import org.junit.Assert; +import org.junit.Test; +import org.tukaani.xz.LZMA2Options; + +public class SevenZMethodConfigurationTest { + + @Test + public void shouldAllowNullOptions() { + Assert.assertNull(new SevenZMethodConfiguration(SevenZMethod.LZMA2, null) + .getOptions()); + } + + @Test + public void shouldAllowLZMA2OptionsForLZMA() { + Assert.assertNotNull(new SevenZMethodConfiguration(SevenZMethod.LZMA, + new LZMA2Options()) + .getOptions()); + } + + @Test + public void shouldAllowNumberForLZMA() { + Assert.assertNotNull(new SevenZMethodConfiguration(SevenZMethod.LZMA, 42) + .getOptions()); + } + + @Test + public void shouldAllowLZMA2OptionsForLZMA2() { + Assert.assertNotNull(new SevenZMethodConfiguration(SevenZMethod.LZMA2, + new LZMA2Options()) + .getOptions()); + } + + @Test + public void shouldAllowNumberForLZMA2() { + Assert.assertNotNull(new SevenZMethodConfiguration(SevenZMethod.LZMA2, 42) + .getOptions()); + } + + @Test + public void shouldAllowNumberForBzip2() { + Assert.assertNotNull(new SevenZMethodConfiguration(SevenZMethod.BZIP2, 42) + .getOptions()); + } + + @Test + public void shouldAllowNumberForDeflate() { + Assert.assertNotNull(new SevenZMethodConfiguration(SevenZMethod.DEFLATE, 42) + .getOptions()); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldNotAllowStringOptionsForLZMA() { + new SevenZMethodConfiguration(SevenZMethod.LZMA, ""); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldNotAllowStringOptionsForLZMA2() { + new SevenZMethodConfiguration(SevenZMethod.LZMA2, ""); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZNativeHeapTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZNativeHeapTest.java new file mode 100644 index 000000000..d5568194a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZNativeHeapTest.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.zip.DataFormatException; +import java.util.zip.Deflater; +import java.util.zip.Inflater; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.sevenz.Coders.DeflateDecoder; +import org.apache.commons.compress.archivers.sevenz.Coders.DeflateDecoder.DeflateDecoderInputStream; +import org.apache.commons.compress.archivers.sevenz.Coders.DeflateDecoder.DeflateDecoderOutputStream; +import org.junit.Test; + +public class SevenZNativeHeapTest extends AbstractTestCase { + + + @Test + public void testEndDeflaterOnCloseStream() throws Exception { + Coders.DeflateDecoder deflateDecoder = new DeflateDecoder(); + + final DeflateDecoderOutputStream outputStream = + (DeflateDecoderOutputStream) deflateDecoder.encode(new ByteArrayOutputStream(), 9); + DelegatingDeflater delegatingDeflater = new DelegatingDeflater(outputStream.deflater); + outputStream.deflater = delegatingDeflater; + outputStream.close(); + assertTrue(delegatingDeflater.isEnded.get()); + + } + + @Test + public void testEndInflaterOnCloseStream() throws Exception { + Coders.DeflateDecoder deflateDecoder = new DeflateDecoder(); + final DeflateDecoderInputStream inputStream = + (DeflateDecoderInputStream) deflateDecoder.decode("dummy",new ByteArrayInputStream(new byte[0]),0,null,null); + DelegatingInflater delegatingInflater = new DelegatingInflater(inputStream.inflater); + inputStream.inflater = delegatingInflater; + inputStream.close(); + + assertTrue(delegatingInflater.isEnded.get()); + } + + private class DelegatingInflater extends Inflater { + + private final Inflater inflater; + + public DelegatingInflater(Inflater inflater) { + this.inflater = inflater; + } + AtomicBoolean isEnded = new AtomicBoolean(); + + @Override + public void end() { + isEnded.set(true); + inflater.end(); + } + + @Override + public void setInput(byte[] b, int off, int len) { + inflater.setInput(b, off, len); + } + + @Override + public void setInput(byte[] b) { + inflater.setInput(b); + } + + @Override + public void setDictionary(byte[] b, int off, int len) { + inflater.setDictionary(b, off, len); + } + + @Override + public void setDictionary(byte[] b) { + inflater.setDictionary(b); + } + + @Override + public int getRemaining() { + return inflater.getRemaining(); + } + + @Override + public boolean needsInput() { + return inflater.needsInput(); + } + + @Override + public boolean needsDictionary() { + return inflater.needsDictionary(); + } + + @Override + public boolean finished() { + return inflater.finished(); + } + + @Override + public int inflate(byte[] b, int off, int len) throws DataFormatException { + return inflater.inflate(b, off, len); + } + + @Override + public int inflate(byte[] b) throws DataFormatException { + return inflater.inflate(b); + } + + @Override + public int getAdler() { + return inflater.getAdler(); + } + + @Override + public int getTotalIn() { + return inflater.getTotalIn(); + } + + @Override + public long getBytesRead() { + return inflater.getBytesRead(); + } + + @Override + public int getTotalOut() { + return inflater.getTotalOut(); + } + + @Override + public long getBytesWritten() { + return inflater.getBytesWritten(); + } + + @Override + public void reset() { + inflater.reset(); + } + + } + + private class DelegatingDeflater extends Deflater { + + private final Deflater deflater; + + public DelegatingDeflater(Deflater deflater) { + this.deflater = deflater; + } + + AtomicBoolean isEnded = new AtomicBoolean(); + + @Override + public void end() { + isEnded.set(true); + deflater.end(); + } + + @Override + public void setInput(byte[] b, int off, int len) { + deflater.setInput(b, off, len); + } + + @Override + public void setInput(byte[] b) { + deflater.setInput(b); + } + + @Override + public void setDictionary(byte[] b, int off, int len) { + deflater.setDictionary(b, off, len); + } + + @Override + public void setDictionary(byte[] b) { + deflater.setDictionary(b); + } + + @Override + public void setStrategy(int strategy) { + deflater.setStrategy(strategy); + } + + @Override + public void setLevel(int level) { + deflater.setLevel(level); + } + + @Override + public boolean needsInput() { + return deflater.needsInput(); + } + + @Override + public void finish() { + deflater.finish(); + } + + @Override + public boolean finished() { + return deflater.finished(); + } + + @Override + public int deflate(byte[] b, int off, int len) { + return deflater.deflate(b, off, len); + } + + @Override + public int deflate(byte[] b) { + return deflater.deflate(b); + } + + @Override + public int deflate(byte[] b, int off, int len, int flush) { + return deflater.deflate(b, off, len, flush); + } + + @Override + public int getAdler() { + return deflater.getAdler(); + } + + @Override + public int getTotalIn() { + return deflater.getTotalIn(); + } + + @Override + public long getBytesRead() { + return deflater.getBytesRead(); + } + + @Override + public int getTotalOut() { + return deflater.getTotalOut(); + } + + @Override + public long getBytesWritten() { + return deflater.getBytesWritten(); + } + + @Override + public void reset() { + deflater.reset(); + } + + + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFileTest.java new file mode 100644 index 000000000..7139ec39a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFileTest.java @@ -0,0 +1,522 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import static org.junit.Assert.*; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; +import org.tukaani.xz.LZMA2Options; + +public class SevenZOutputFileTest extends AbstractTestCase { + + private static final boolean XZ_BCJ_IS_BUGGY; + + static { + final String version = org.tukaani.xz.XZ.class.getPackage().getImplementationVersion(); + + XZ_BCJ_IS_BUGGY= version != null && version.equals("1.4"); + if (XZ_BCJ_IS_BUGGY) { + System.out.println("XZ version is " + version + " - skipping BCJ tests"); + } + } + private File output; + + @Override + public void tearDown() throws Exception { + if (output != null && !output.delete()) { + output.deleteOnExit(); + } + super.tearDown(); + } + + @Test + public void testDirectoriesAndEmptyFiles() throws Exception { + output = new File(dir, "empties.7z"); + + final Date accessDate = new Date(); + final Calendar cal = Calendar.getInstance(); + cal.add(Calendar.HOUR, -1); + final Date creationDate = cal.getTime(); + + try (SevenZOutputFile outArchive = new SevenZOutputFile(output)) { + SevenZArchiveEntry entry = outArchive.createArchiveEntry(dir, "foo/"); + outArchive.putArchiveEntry(entry); + outArchive.closeArchiveEntry(); + + entry = new SevenZArchiveEntry(); + entry.setName("foo/bar"); + entry.setCreationDate(creationDate); + entry.setAccessDate(accessDate); + outArchive.putArchiveEntry(entry); + outArchive.write(new byte[0]); + outArchive.closeArchiveEntry(); + + entry = new SevenZArchiveEntry(); + entry.setName("xyzzy"); + outArchive.putArchiveEntry(entry); + outArchive.write(0); + outArchive.closeArchiveEntry(); + + entry = outArchive.createArchiveEntry(dir, "baz/"); + entry.setAntiItem(true); + outArchive.putArchiveEntry(entry); + outArchive.closeArchiveEntry(); + + entry = new SevenZArchiveEntry(); + entry.setName("dada"); + entry.setHasWindowsAttributes(true); + entry.setWindowsAttributes(17); + outArchive.putArchiveEntry(entry); + outArchive.write(5); + outArchive.write(42); + outArchive.closeArchiveEntry(); + + outArchive.finish(); + } + + try (SevenZFile archive = new SevenZFile(output)) { + SevenZArchiveEntry entry = archive.getNextEntry(); + assert (entry != null); + assertEquals("foo/", entry.getName()); + assertTrue(entry.isDirectory()); + assertFalse(entry.isAntiItem()); + + entry = archive.getNextEntry(); + assert (entry != null); + assertEquals("foo/bar", entry.getName()); + assertFalse(entry.isDirectory()); + assertFalse(entry.isAntiItem()); + assertEquals(0, entry.getSize()); + assertFalse(entry.getHasLastModifiedDate()); + assertEquals(accessDate, entry.getAccessDate()); + assertEquals(creationDate, entry.getCreationDate()); + + entry = archive.getNextEntry(); + assert (entry != null); + assertEquals("xyzzy", entry.getName()); + assertEquals(1, entry.getSize()); + assertFalse(entry.getHasAccessDate()); + assertFalse(entry.getHasCreationDate()); + assertEquals(0, archive.read()); + + entry = archive.getNextEntry(); + assert (entry != null); + assertEquals("baz/", entry.getName()); + assertTrue(entry.isDirectory()); + assertTrue(entry.isAntiItem()); + + entry = archive.getNextEntry(); + assert (entry != null); + assertEquals("dada", entry.getName()); + assertEquals(2, entry.getSize()); + final byte[] content = new byte[2]; + assertEquals(2, archive.read(content)); + assertEquals(5, content[0]); + assertEquals(42, content[1]); + assertEquals(17, entry.getWindowsAttributes()); + + assert (archive.getNextEntry() == null); + } + + } + + @Test + public void testDirectoriesOnly() throws Exception { + output = new File(dir, "dirs.7z"); + try (SevenZOutputFile outArchive = new SevenZOutputFile(output)) { + final SevenZArchiveEntry entry = new SevenZArchiveEntry(); + entry.setName("foo/"); + entry.setDirectory(true); + outArchive.putArchiveEntry(entry); + outArchive.closeArchiveEntry(); + } + + try (SevenZFile archive = new SevenZFile(output)) { + final SevenZArchiveEntry entry = archive.getNextEntry(); + assert (entry != null); + assertEquals("foo/", entry.getName()); + assertTrue(entry.isDirectory()); + assertFalse(entry.isAntiItem()); + + assert (archive.getNextEntry() == null); + } + + } + + @Test + public void testCantFinishTwice() throws Exception { + output = new File(dir, "finish.7z"); + try (SevenZOutputFile outArchive = new SevenZOutputFile(output)) { + outArchive.finish(); + outArchive.finish(); + fail("shouldn't be able to call finish twice"); + } catch (final IOException ex) { + assertEquals("This archive has already been finished", ex.getMessage()); + } + } + + @Test + public void testSixEmptyFiles() throws Exception { + testCompress252(6, 0); + } + + @Test + public void testSixFilesSomeNotEmpty() throws Exception { + testCompress252(6, 2); + } + + @Test + public void testSevenEmptyFiles() throws Exception { + testCompress252(7, 0); + } + + @Test + public void testSevenFilesSomeNotEmpty() throws Exception { + testCompress252(7, 2); + } + + @Test + public void testEightEmptyFiles() throws Exception { + testCompress252(8, 0); + } + + @Test + public void testEightFilesSomeNotEmpty() throws Exception { + testCompress252(8, 2); + } + + @Test + public void testNineEmptyFiles() throws Exception { + testCompress252(9, 0); + } + + @Test + public void testNineFilesSomeNotEmpty() throws Exception { + testCompress252(9, 2); + } + + @Test + public void testTwentyNineEmptyFiles() throws Exception { + testCompress252(29, 0); + } + + @Test + public void testTwentyNineFilesSomeNotEmpty() throws Exception { + testCompress252(29, 7); + } + + @Test + public void testCopyRoundtrip() throws Exception { + testRoundTrip(SevenZMethod.COPY); + } + + @Test + public void testBzip2Roundtrip() throws Exception { + testRoundTrip(SevenZMethod.BZIP2); + } + + @Test + public void testLzma2Roundtrip() throws Exception { + testRoundTrip(SevenZMethod.LZMA2); + } + + @Test + public void testDeflateRoundtrip() throws Exception { + testRoundTrip(SevenZMethod.DEFLATE); + } + + @Test + public void testBCJX86Roundtrip() throws Exception { + if (XZ_BCJ_IS_BUGGY) { return; } + testFilterRoundTrip(new SevenZMethodConfiguration(SevenZMethod.BCJ_X86_FILTER)); + } + + @Test + public void testBCJARMRoundtrip() throws Exception { + if (XZ_BCJ_IS_BUGGY) { return; } + testFilterRoundTrip(new SevenZMethodConfiguration(SevenZMethod.BCJ_ARM_FILTER)); + } + + @Test + public void testBCJARMThumbRoundtrip() throws Exception { + if (XZ_BCJ_IS_BUGGY) { return; } + testFilterRoundTrip(new SevenZMethodConfiguration(SevenZMethod.BCJ_ARM_THUMB_FILTER)); + } + + @Test + public void testBCJIA64Roundtrip() throws Exception { + if (XZ_BCJ_IS_BUGGY) { return; } + testFilterRoundTrip(new SevenZMethodConfiguration(SevenZMethod.BCJ_IA64_FILTER)); + } + + @Test + public void testBCJPPCRoundtrip() throws Exception { + if (XZ_BCJ_IS_BUGGY) { return; } + testFilterRoundTrip(new SevenZMethodConfiguration(SevenZMethod.BCJ_PPC_FILTER)); + } + + @Test + public void testBCJSparcRoundtrip() throws Exception { + if (XZ_BCJ_IS_BUGGY) { return; } + testFilterRoundTrip(new SevenZMethodConfiguration(SevenZMethod.BCJ_SPARC_FILTER)); + } + + @Test + public void testDeltaRoundtrip() throws Exception { + testFilterRoundTrip(new SevenZMethodConfiguration(SevenZMethod.DELTA_FILTER)); + } + + @Test + public void testStackOfContentCompressions() throws Exception { + output = new File(dir, "multiple-methods.7z"); + final ArrayList<SevenZMethodConfiguration> methods = new ArrayList<>(); + methods.add(new SevenZMethodConfiguration(SevenZMethod.LZMA2)); + methods.add(new SevenZMethodConfiguration(SevenZMethod.COPY)); + methods.add(new SevenZMethodConfiguration(SevenZMethod.DEFLATE)); + methods.add(new SevenZMethodConfiguration(SevenZMethod.BZIP2)); + createAndReadBack(output, methods); + } + + @Test + public void testStackOfContentCompressionsInMemory() throws Exception { + final ArrayList<SevenZMethodConfiguration> methods = new ArrayList<>(); + methods.add(new SevenZMethodConfiguration(SevenZMethod.LZMA2)); + methods.add(new SevenZMethodConfiguration(SevenZMethod.COPY)); + methods.add(new SevenZMethodConfiguration(SevenZMethod.DEFLATE)); + methods.add(new SevenZMethodConfiguration(SevenZMethod.BZIP2)); + createAndReadBack(new SeekableInMemoryByteChannel(), methods); + } + + @Test + public void testDeflateWithConfiguration() throws Exception { + output = new File(dir, "deflate-options.7z"); + // Deflater.BEST_SPEED + createAndReadBack(output, Collections + .singletonList(new SevenZMethodConfiguration(SevenZMethod.DEFLATE, 1))); + } + + @Test + public void testBzip2WithConfiguration() throws Exception { + output = new File(dir, "bzip2-options.7z"); + // 400k block size + createAndReadBack(output, Collections + .singletonList(new SevenZMethodConfiguration(SevenZMethod.BZIP2, 4))); + } + + @Test + public void testLzmaWithIntConfiguration() throws Exception { + output = new File(dir, "lzma-options.7z"); + // 1 MB dictionary + createAndReadBack(output, Collections + .singletonList(new SevenZMethodConfiguration(SevenZMethod.LZMA, 1 << 20))); + } + + @Test + public void testLzmaWithOptionsConfiguration() throws Exception { + output = new File(dir, "lzma-options2.7z"); + final LZMA2Options opts = new LZMA2Options(1); + createAndReadBack(output, Collections + .singletonList(new SevenZMethodConfiguration(SevenZMethod.LZMA, opts))); + } + + @Test + public void testLzma2WithIntConfiguration() throws Exception { + output = new File(dir, "lzma2-options.7z"); + // 1 MB dictionary + createAndReadBack(output, Collections + .singletonList(new SevenZMethodConfiguration(SevenZMethod.LZMA2, 1 << 20))); + } + + @Test + public void testLzma2WithOptionsConfiguration() throws Exception { + output = new File(dir, "lzma2-options2.7z"); + final LZMA2Options opts = new LZMA2Options(1); + createAndReadBack(output, Collections + .singletonList(new SevenZMethodConfiguration(SevenZMethod.LZMA2, opts))); + } + + @Test + public void testArchiveWithMixedMethods() throws Exception { + output = new File(dir, "mixed-methods.7z"); + try (SevenZOutputFile outArchive = new SevenZOutputFile(output)) { + addFile(outArchive, 0, true); + addFile(outArchive, 1, true, Arrays.asList(new SevenZMethodConfiguration(SevenZMethod.BZIP2))); + } + + try (SevenZFile archive = new SevenZFile(output)) { + assertEquals(Boolean.TRUE, + verifyFile(archive, 0, Arrays.asList(new SevenZMethodConfiguration(SevenZMethod.LZMA2)))); + assertEquals(Boolean.TRUE, + verifyFile(archive, 1, Arrays.asList(new SevenZMethodConfiguration(SevenZMethod.BZIP2)))); + } + } + + private void testCompress252(final int numberOfFiles, final int numberOfNonEmptyFiles) + throws Exception { + final int nonEmptyModulus = numberOfNonEmptyFiles != 0 + ? numberOfFiles / numberOfNonEmptyFiles + : numberOfFiles + 1; + int nonEmptyFilesAdded = 0; + output = new File(dir, "COMPRESS252-" + numberOfFiles + "-" + numberOfNonEmptyFiles + ".7z"); + try (SevenZOutputFile archive = new SevenZOutputFile(output)) { + addDir(archive); + for (int i = 0; i < numberOfFiles; i++) { + addFile(archive, i, + (i + 1) % nonEmptyModulus == 0 && nonEmptyFilesAdded++ < numberOfNonEmptyFiles); + } + } + verifyCompress252(output, numberOfFiles, numberOfNonEmptyFiles); + } + + private void verifyCompress252(final File output, final int numberOfFiles, final int numberOfNonEmptyFiles) + throws Exception { + int filesFound = 0; + int nonEmptyFilesFound = 0; + try (SevenZFile archive = new SevenZFile(output)) { + verifyDir(archive); + Boolean b = verifyFile(archive, filesFound++); + while (b != null) { + if (Boolean.TRUE.equals(b)) { + nonEmptyFilesFound++; + } + b = verifyFile(archive, filesFound++); + } + } + assertEquals(numberOfFiles + 1, filesFound); + assertEquals(numberOfNonEmptyFiles, nonEmptyFilesFound); + } + + private void addDir(final SevenZOutputFile archive) throws Exception { + final SevenZArchiveEntry entry = archive.createArchiveEntry(dir, "foo/"); + archive.putArchiveEntry(entry); + archive.closeArchiveEntry(); + } + + private void verifyDir(final SevenZFile archive) throws Exception { + final SevenZArchiveEntry entry = archive.getNextEntry(); + assertNotNull(entry); + assertEquals("foo/", entry.getName()); + assertTrue(entry.isDirectory()); + } + + private void addFile(final SevenZOutputFile archive, final int index, final boolean nonEmpty) + throws Exception { + addFile(archive, index, nonEmpty, null); + } + + private void addFile(final SevenZOutputFile archive, final int index, final boolean nonEmpty, final Iterable<SevenZMethodConfiguration> methods) + throws Exception { + final SevenZArchiveEntry entry = new SevenZArchiveEntry(); + entry.setName("foo/" + index + ".txt"); + entry.setContentMethods(methods); + archive.putArchiveEntry(entry); + archive.write(nonEmpty ? new byte[] { 'A' } : new byte[0]); + archive.closeArchiveEntry(); + } + + private Boolean verifyFile(final SevenZFile archive, final int index) throws Exception { + return verifyFile(archive, index, null); + } + + private Boolean verifyFile(final SevenZFile archive, final int index, + final Iterable<SevenZMethodConfiguration> methods) throws Exception { + final SevenZArchiveEntry entry = archive.getNextEntry(); + if (entry == null) { + return null; + } + assertEquals("foo/" + index + ".txt", entry.getName()); + assertEquals(false, entry.isDirectory()); + if (entry.getSize() == 0) { + return Boolean.FALSE; + } + assertEquals(1, entry.getSize()); + assertEquals('A', archive.read()); + assertEquals(-1, archive.read()); + if (methods != null) { + assertContentMethodsEquals(methods, entry.getContentMethods()); + } + return Boolean.TRUE; + } + + private void testRoundTrip(final SevenZMethod method) throws Exception { + output = new File(dir, method + "-roundtrip.7z"); + final ArrayList<SevenZMethodConfiguration> methods = new ArrayList<>(); + methods.add(new SevenZMethodConfiguration(method)); + createAndReadBack(output, methods); + } + + private void testFilterRoundTrip(final SevenZMethodConfiguration method) throws Exception { + output = new File(dir, method.getMethod() + "-roundtrip.7z"); + final ArrayList<SevenZMethodConfiguration> methods = new ArrayList<>(); + methods.add(method); + methods.add(new SevenZMethodConfiguration(SevenZMethod.LZMA2)); + createAndReadBack(output, methods); + } + + private void createAndReadBack(final File output, final Iterable<SevenZMethodConfiguration> methods) throws Exception { + final SevenZOutputFile outArchive = new SevenZOutputFile(output); + outArchive.setContentMethods(methods); + try { + addFile(outArchive, 0, true); + } finally { + outArchive.close(); + } + + try (SevenZFile archive = new SevenZFile(output)) { + assertEquals(Boolean.TRUE, verifyFile(archive, 0, methods)); + } + } + + private void createAndReadBack(final SeekableInMemoryByteChannel output, final Iterable<SevenZMethodConfiguration> methods) throws Exception { + final SevenZOutputFile outArchive = new SevenZOutputFile(output); + outArchive.setContentMethods(methods); + try { + addFile(outArchive, 0, true); + } finally { + outArchive.close(); + } + try (SevenZFile archive = + new SevenZFile(new SeekableInMemoryByteChannel(output.array()), "in memory")) { + assertEquals(Boolean.TRUE, verifyFile(archive, 0, methods)); + } + } + + private static void assertContentMethodsEquals(final Iterable<? extends SevenZMethodConfiguration> expected, + final Iterable<? extends SevenZMethodConfiguration> actual) { + assertNotNull(actual); + final Iterator<? extends SevenZMethodConfiguration> expectedIter = expected.iterator(); + final Iterator<? extends SevenZMethodConfiguration> actualIter = actual.iterator(); + while (expectedIter.hasNext()) { + assertTrue(actualIter.hasNext()); + final SevenZMethodConfiguration expConfig = expectedIter.next(); + final SevenZMethodConfiguration actConfig = actualIter.next(); + assertEquals(expConfig.getMethod(), actConfig.getMethod()); + } + assertFalse(actualIter.hasNext()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/BigFilesIT.java b/src/test/java/org/apache/commons/compress/archivers/tar/BigFilesIT.java new file mode 100644 index 000000000..14cfe6458 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/tar/BigFilesIT.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.tar; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.io.BufferedInputStream; +import java.io.InputStream; +import java.util.Random; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.junit.Test; + +public class BigFilesIT { + + @Test + public void readFileBiggerThan8GByteStar() throws Exception { + readFileBiggerThan8GByte("/8.star.tar.gz"); + } + + @Test + public void readFileBiggerThan8GBytePosix() throws Exception { + readFileBiggerThan8GByte("/8.posix.tar.gz"); + } + + @Test + public void readFileHeadersOfArchiveBiggerThan8GByte() throws Exception { + InputStream in = null; + GzipCompressorInputStream gzin = null; + TarArchiveInputStream tin = null; + try { + in = new BufferedInputStream(BigFilesIT.class + .getResourceAsStream("/8.posix.tar.gz") + ); + gzin = new GzipCompressorInputStream(in); + tin = new TarArchiveInputStream(gzin); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertNotNull(e); + assertNull(tin.getNextTarEntry()); + } finally { + if (tin != null) { + tin.close(); + } + if (gzin != null) { + gzin.close(); + } + if (in != null) { + in.close(); + } + } + } + + private void readFileBiggerThan8GByte(final String name) throws Exception { + InputStream in = null; + GzipCompressorInputStream gzin = null; + TarArchiveInputStream tin = null; + try { + in = new BufferedInputStream(BigFilesIT.class + .getResourceAsStream(name)); + gzin = new GzipCompressorInputStream(in); + tin = new TarArchiveInputStream(gzin); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertNotNull(e); + assertEquals(8200l * 1024 * 1024, e.getSize()); + + long read = 0; + final Random r = new Random(System.currentTimeMillis()); + int readNow; + final byte[] buf = new byte[1024 * 1024]; + while ((readNow = tin.read(buf, 0, buf.length)) > 0) { + // testing all bytes for a value of 0 is going to take + // too long, just pick a few ones randomly + for (int i = 0; i < 100; i++) { + final int idx = r.nextInt(readNow); + assertEquals("testing byte " + (read + idx), 0, buf[idx]); + } + read += readNow; + } + assertEquals(8200l * 1024 * 1024, read); + assertNull(tin.getNextTarEntry()); + } finally { + if (tin != null) { + tin.close(); + } + if (gzin != null) { + gzin.close(); + } + if (in != null) { + in.close(); + } + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java new file mode 100644 index 000000000..56ac22731 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.tar; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.*; +import org.junit.Test; + +import java.io.File; +import java.io.FileInputStream; + + +public class SparseFilesTest { + + @Test + public void testOldGNU() throws Throwable { + final File file = getFile("oldgnu_sparse.tar"); + TarArchiveInputStream tin = null; + try { + tin = new TarArchiveInputStream(new FileInputStream(file)); + final TarArchiveEntry ae = tin.getNextTarEntry(); + assertEquals("sparsefile", ae.getName()); + assertTrue(ae.isOldGNUSparse()); + assertTrue(ae.isGNUSparse()); + assertFalse(ae.isPaxGNUSparse()); + assertFalse(tin.canReadEntryData(ae)); + } finally { + if (tin != null) { + tin.close(); + } + } + } + + @Test + public void testPaxGNU() throws Throwable { + final File file = getFile("pax_gnu_sparse.tar"); + TarArchiveInputStream tin = null; + try { + tin = new TarArchiveInputStream(new FileInputStream(file)); + assertPaxGNUEntry(tin, "0.0"); + assertPaxGNUEntry(tin, "0.1"); + assertPaxGNUEntry(tin, "1.0"); + } finally { + if (tin != null) { + tin.close(); + } + } + } + + private void assertPaxGNUEntry(final TarArchiveInputStream tin, final String suffix) throws Throwable { + final TarArchiveEntry ae = tin.getNextTarEntry(); + assertEquals("sparsefile-" + suffix, ae.getName()); + assertTrue(ae.isGNUSparse()); + assertTrue(ae.isPaxGNUSparse()); + assertFalse(ae.isOldGNUSparse()); + assertFalse(tin.canReadEntryData(ae)); + } +} + diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java new file mode 100644 index 000000000..703d3a470 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.tar; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.Locale; +import org.apache.commons.compress.AbstractTestCase; +import org.junit.Test; + +public class TarArchiveEntryTest implements TarConstants { + + private static final String OS = + System.getProperty("os.name").toLowerCase(Locale.ENGLISH); + private static final String ROOT = + OS.startsWith("windows") || OS.startsWith("netware") ? "C:\\" : "/"; + + /** + * JIRA issue SANDBOX-284 + * + * @see "https://issues.apache.org/jira/browse/SANDBOX-284" + */ + @Test + public void testFileSystemRoot() { + final TarArchiveEntry t = new TarArchiveEntry(new File(ROOT)); + assertEquals("/", t.getName()); + } + + @Test + public void testTarFileWithFSRoot() throws IOException { + final File f = File.createTempFile("taetest", ".tar"); + f.deleteOnExit(); + TarArchiveOutputStream tout = null; + TarArchiveInputStream tin = null; + try { + tout = new TarArchiveOutputStream(new FileOutputStream(f)); + TarArchiveEntry t = new TarArchiveEntry(new File(ROOT)); + tout.putArchiveEntry(t); + tout.closeArchiveEntry(); + t = new TarArchiveEntry(new File(new File(ROOT), "foo.txt")); + t.setSize(6); + tout.putArchiveEntry(t); + tout.write(new byte[] {'h', 'e', 'l', 'l', 'o', ' '}); + tout.closeArchiveEntry(); + t = new TarArchiveEntry(new File(new File(ROOT), "bar.txt") + .getAbsolutePath()); + t.setSize(5); + tout.putArchiveEntry(t); + tout.write(new byte[] {'w', 'o', 'r', 'l', 'd'}); + tout.closeArchiveEntry(); + t = new TarArchiveEntry("dummy"); + t.setName(new File(new File(ROOT), "baz.txt").getAbsolutePath()); + t.setSize(1); + tout.putArchiveEntry(t); + tout.write(new byte[] {'!'}); + tout.closeArchiveEntry(); + tout.close(); + tout = null; + + tin = new TarArchiveInputStream(new FileInputStream(f)); + //tin.setDebug(true); + t = tin.getNextTarEntry(); + assertNotNull(t); + assertEquals("/", t.getName()); + assertTrue(t.isCheckSumOK()); + t = tin.getNextTarEntry(); + assertNotNull(t); + assertEquals("foo.txt", t.getName()); + assertTrue(t.isCheckSumOK()); + t = tin.getNextTarEntry(); + assertNotNull(t); + assertEquals("bar.txt", t.getName()); + assertTrue(t.isCheckSumOK()); + t = tin.getNextTarEntry(); + assertNotNull(t); + assertEquals("baz.txt", t.getName()); + assertTrue(t.isCheckSumOK()); + } finally { + if (tin != null) { + tin.close(); + } + if (tout != null) { + tout.close(); + } + AbstractTestCase.tryHardToDelete(f); + } + } + + @Test + public void testMaxFileSize(){ + final TarArchiveEntry t = new TarArchiveEntry(""); + t.setSize(0); + t.setSize(1); + try { + t.setSize(-1); + fail("Should have generated IllegalArgumentException"); + } catch (final IllegalArgumentException expected) { + } + t.setSize(077777777777L); + t.setSize(0100000000000L); + } + + @Test public void testExtraPaxHeaders() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + + TarArchiveEntry entry = new TarArchiveEntry("./weasels"); + entry.addPaxHeader("APACHE.mustelida","true"); + entry.addPaxHeader("SCHILY.xattr.user.org.apache.weasels","maximum weasels"); + entry.addPaxHeader("size","1"); + assertEquals("extra header count",2,entry.getExtraPaxHeaders().size()); + assertEquals("APACHE.mustelida","true", + entry.getExtraPaxHeader("APACHE.mustelida")); + assertEquals("SCHILY.xattr.user.org.apache.weasels","maximum weasels", + entry.getExtraPaxHeader("SCHILY.xattr.user.org.apache.weasels")); + assertEquals("size",entry.getSize(),1); + + tos.putArchiveEntry(entry); + tos.write('W'); + tos.closeArchiveEntry(); + tos.close(); + assertNotEquals("should have extra headers before clear",0,entry.getExtraPaxHeaders().size()); + entry.clearExtraPaxHeaders(); + assertEquals("extra headers should be empty after clear",0,entry.getExtraPaxHeaders().size()); + TarArchiveInputStream tis = new TarArchiveInputStream(new ByteArrayInputStream(bos.toByteArray())); + entry = tis.getNextTarEntry(); + assertNotNull("couldn't get entry",entry); + + assertEquals("extra header count",2,entry.getExtraPaxHeaders().size()); + assertEquals("APACHE.mustelida","true", + entry.getExtraPaxHeader("APACHE.mustelida")); + assertEquals("user.org.apache.weasels","maximum weasels", + entry.getExtraPaxHeader("SCHILY.xattr.user.org.apache.weasels")); + + assertEquals('W',tis.read()); + assertTrue("should be at end of entry",tis.read() <0); + + assertNull("should be at end of file",tis.getNextTarEntry()); + tis.close(); + } + + @Test + public void testLinkFlagConstructor() { + final TarArchiveEntry t = new TarArchiveEntry("/foo", LF_GNUTYPE_LONGNAME); + assertGnuMagic(t); + assertEquals("foo", t.getName()); + } + + @Test + public void testLinkFlagConstructorWithFileFlag() { + final TarArchiveEntry t = new TarArchiveEntry("/foo", LF_NORMAL); + assertPosixMagic(t); + assertEquals("foo", t.getName()); + } + + @Test + public void testLinkFlagConstructorWithPreserve() { + final TarArchiveEntry t = new TarArchiveEntry("/foo", LF_GNUTYPE_LONGNAME, + true); + assertGnuMagic(t); + assertEquals("/foo", t.getName()); + } + + @Test + public void preservesDriveSpecOnWindowsAndNetwareIfAskedTo() { + assumeTrue("C:\\".equals(ROOT)); + TarArchiveEntry t = new TarArchiveEntry(ROOT + "foo.txt", true); + assertEquals("C:/foo.txt", t.getName()); + t = new TarArchiveEntry(ROOT + "foo.txt", LF_GNUTYPE_LONGNAME, true); + assertEquals("C:/foo.txt", t.getName()); + } + + private void assertGnuMagic(final TarArchiveEntry t) { + assertEquals(MAGIC_GNU + VERSION_GNU_SPACE, readMagic(t)); + } + + private void assertPosixMagic(final TarArchiveEntry t) { + assertEquals(MAGIC_POSIX + VERSION_POSIX, readMagic(t)); + } + + private String readMagic(final TarArchiveEntry t) { + final byte[] buf = new byte[512]; + t.writeEntryHeader(buf); + return new String(buf, MAGIC_OFFSET, MAGICLEN + VERSIONLEN); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java new file mode 100644 index 000000000..b7dcb06c4 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java @@ -0,0 +1,381 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.tar; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.apache.commons.compress.AbstractTestCase.mkdir; +import static org.apache.commons.compress.AbstractTestCase.rmdir; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Calendar; +import java.util.Date; +import java.util.Map; +import java.util.TimeZone; +import java.util.zip.GZIPInputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.utils.CharsetNames; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public class TarArchiveInputStreamTest { + + @Test + public void readSimplePaxHeader() throws Exception { + final InputStream is = new ByteArrayInputStream(new byte[1]); + final TarArchiveInputStream tais = new TarArchiveInputStream(is); + final Map<String, String> headers = tais + .parsePaxHeaders(new ByteArrayInputStream("30 atime=1321711775.972059463\n" + .getBytes(CharsetNames.UTF_8))); + assertEquals(1, headers.size()); + assertEquals("1321711775.972059463", headers.get("atime")); + tais.close(); + } + + @Test + public void secondEntryWinsWhenPaxHeaderContainsDuplicateKey() throws Exception { + final InputStream is = new ByteArrayInputStream(new byte[1]); + final TarArchiveInputStream tais = new TarArchiveInputStream(is); + final Map<String, String> headers = tais + .parsePaxHeaders(new ByteArrayInputStream("11 foo=bar\n11 foo=baz\n" + .getBytes(CharsetNames.UTF_8))); + assertEquals(1, headers.size()); + assertEquals("baz", headers.get("foo")); + tais.close(); + } + + @Test + public void paxHeaderEntryWithEmptyValueRemovesKey() throws Exception { + final InputStream is = new ByteArrayInputStream(new byte[1]); + final TarArchiveInputStream tais = new TarArchiveInputStream(is); + final Map<String, String> headers = tais + .parsePaxHeaders(new ByteArrayInputStream("11 foo=bar\n7 foo=\n" + .getBytes(CharsetNames.UTF_8))); + assertEquals(0, headers.size()); + tais.close(); + } + + @Test + public void readPaxHeaderWithEmbeddedNewline() throws Exception { + final InputStream is = new ByteArrayInputStream(new byte[1]); + final TarArchiveInputStream tais = new TarArchiveInputStream(is); + final Map<String, String> headers = tais + .parsePaxHeaders(new ByteArrayInputStream("28 comment=line1\nline2\nand3\n" + .getBytes(CharsetNames.UTF_8))); + assertEquals(1, headers.size()); + assertEquals("line1\nline2\nand3", headers.get("comment")); + tais.close(); + } + + @Test + public void readNonAsciiPaxHeader() throws Exception { + final String ae = "\u00e4"; + final String line = "11 path="+ ae + "\n"; + assertEquals(11, line.getBytes(CharsetNames.UTF_8).length); + final InputStream is = new ByteArrayInputStream(new byte[1]); + final TarArchiveInputStream tais = new TarArchiveInputStream(is); + final Map<String, String> headers = tais + .parsePaxHeaders(new ByteArrayInputStream(line.getBytes(CharsetNames.UTF_8))); + assertEquals(1, headers.size()); + assertEquals(ae, headers.get("path")); + tais.close(); + } + + @Test + public void workaroundForBrokenTimeHeader() throws Exception { + TarArchiveInputStream in = null; + try { + in = new TarArchiveInputStream(new FileInputStream(getFile("simple-aix-native-tar.tar"))); + TarArchiveEntry tae = in.getNextTarEntry(); + tae = in.getNextTarEntry(); + assertEquals("sample/link-to-txt-file.lnk", tae.getName()); + assertEquals(new Date(0), tae.getLastModifiedDate()); + assertTrue(tae.isSymbolicLink()); + assertTrue(tae.isCheckSumOK()); + } finally { + if (in != null) { + in.close(); + } + } + } + + @Test + public void datePriorToEpochInGNUFormat() throws Exception { + datePriorToEpoch("preepoch-star.tar"); + } + + + @Test + public void datePriorToEpochInPAXFormat() throws Exception { + datePriorToEpoch("preepoch-posix.tar"); + } + + private void datePriorToEpoch(final String archive) throws Exception { + TarArchiveInputStream in = null; + try { + in = new TarArchiveInputStream(new FileInputStream(getFile(archive))); + final TarArchiveEntry tae = in.getNextTarEntry(); + assertEquals("foo", tae.getName()); + final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + cal.set(1969, 11, 31, 23, 59, 59); + cal.set(Calendar.MILLISECOND, 0); + assertEquals(cal.getTime(), tae.getLastModifiedDate()); + assertTrue(tae.isCheckSumOK()); + } finally { + if (in != null) { + in.close(); + } + } + } + + @Test + public void testCompress197() throws Exception { + try (TarArchiveInputStream tar = getTestStream("/COMPRESS-197.tar")) { + TarArchiveEntry entry = tar.getNextTarEntry(); + while (entry != null) { + entry = tar.getNextTarEntry(); + } + } catch (final IOException e) { + fail("COMPRESS-197: " + e.getMessage()); + } + } + + @Test + public void shouldUseSpecifiedEncodingWhenReadingGNULongNames() + throws Exception { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final String encoding = CharsetNames.UTF_16; + final String name = "1234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "01234567890\u00e4"; + final TarArchiveOutputStream tos = + new TarArchiveOutputStream(bos, encoding); + tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); + TarArchiveEntry t = new TarArchiveEntry(name); + t.setSize(1); + tos.putArchiveEntry(t); + tos.write(30); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + final ByteArrayInputStream bis = new ByteArrayInputStream(data); + final TarArchiveInputStream tis = + new TarArchiveInputStream(bis, encoding); + t = tis.getNextTarEntry(); + assertEquals(name, t.getName()); + tis.close(); + } + + @Test + public void shouldConsumeArchiveCompletely() throws Exception { + final InputStream is = TarArchiveInputStreamTest.class + .getResourceAsStream("/archive_with_trailer.tar"); + final TarArchiveInputStream tar = new TarArchiveInputStream(is); + while (tar.getNextTarEntry() != null) { + // just consume the archive + } + final byte[] expected = new byte[] { + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', '\n' + }; + final byte[] actual = new byte[expected.length]; + is.read(actual); + assertArrayEquals(expected, actual); + tar.close(); + } + + @Test + public void readsArchiveCompletely_COMPRESS245() throws Exception { + try (InputStream is = TarArchiveInputStreamTest.class + .getResourceAsStream("/COMPRESS-245.tar.gz")) { + final InputStream gin = new GZIPInputStream(is); + final TarArchiveInputStream tar = new TarArchiveInputStream(gin); + int count = 0; + TarArchiveEntry entry = tar.getNextTarEntry(); + while (entry != null) { + count++; + entry = tar.getNextTarEntry(); + } + assertEquals(31, count); + tar.close(); + } catch (final IOException e) { + fail("COMPRESS-245: " + e.getMessage()); + } + } + + @Test(expected = IOException.class) + public void shouldThrowAnExceptionOnTruncatedEntries() throws Exception { + final File dir = mkdir("COMPRESS-279"); + final TarArchiveInputStream is = getTestStream("/COMPRESS-279.tar"); + FileOutputStream out = null; + try { + TarArchiveEntry entry = is.getNextTarEntry(); + int count = 0; + while (entry != null) { + out = new FileOutputStream(new File(dir, String.valueOf(count))); + IOUtils.copy(is, out); + out.close(); + out = null; + count++; + entry = is.getNextTarEntry(); + } + } finally { + is.close(); + if (out != null) { + out.close(); + } + rmdir(dir); + } + } + + @Test + public void shouldReadBigGid() throws Exception { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); + TarArchiveEntry t = new TarArchiveEntry("name"); + t.setGroupId(4294967294l); + t.setSize(1); + tos.putArchiveEntry(t); + tos.write(30); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + final ByteArrayInputStream bis = new ByteArrayInputStream(data); + final TarArchiveInputStream tis = + new TarArchiveInputStream(bis); + t = tis.getNextTarEntry(); + assertEquals(4294967294l, t.getLongGroupId()); + tis.close(); + } + + /** + * @link "https://issues.apache.org/jira/browse/COMPRESS-324" + */ + @Test + public void shouldReadGNULongNameEntryWithWrongName() throws Exception { + try (TarArchiveInputStream is = getTestStream("/COMPRESS-324.tar")) { + final TarArchiveEntry entry = is.getNextTarEntry(); + assertEquals("1234567890123456789012345678901234567890123456789012345678901234567890" + + "1234567890123456789012345678901234567890123456789012345678901234567890" + + "1234567890123456789012345678901234567890123456789012345678901234567890" + + "1234567890123456789012345678901234567890.txt", + entry.getName()); + } + } + + /** + * @link "https://issues.apache.org/jira/browse/COMPRESS-355" + */ + @Test + public void survivesBlankLinesInPaxHeader() throws Exception { + try (TarArchiveInputStream is = getTestStream("/COMPRESS-355.tar")) { + final TarArchiveEntry entry = is.getNextTarEntry(); + assertEquals("package/package.json", entry.getName()); + assertNull(is.getNextTarEntry()); + } + } + + /** + * @link "https://issues.apache.org/jira/browse/COMPRESS-356" + */ + @Test + public void survivesPaxHeaderWithNameEndingInSlash() throws Exception { + try (TarArchiveInputStream is = getTestStream("/COMPRESS-356.tar")) { + final TarArchiveEntry entry = is.getNextTarEntry(); + assertEquals("package/package.json", entry.getName()); + assertNull(is.getNextTarEntry()); + } + } + @Test + public void testGetAndSetOfPaxEntry() throws Exception { + try (TarArchiveInputStream is = getTestStream("/COMPRESS-356.tar")) { + final TarArchiveEntry entry = is.getNextTarEntry(); + assertEquals("package/package.json", entry.getName()); + assertEquals(is.getCurrentEntry(),entry); + TarArchiveEntry weaselEntry = new TarArchiveEntry(entry.getName()); + weaselEntry.setSize(entry.getSize()); + is.setCurrentEntry(weaselEntry); + assertEquals(entry,is.getCurrentEntry()); + assertFalse(entry == is.getCurrentEntry()); + assertTrue(weaselEntry == is.getCurrentEntry()); + try { + is.setCurrentEntry(null); + is.read(); + fail("should abort because current entry is nulled"); + } catch(IllegalStateException e) { + // expected + } + is.setCurrentEntry(entry); + is.read(); + } + } + + /** + * @link "https://issues.apache.org/jira/browse/COMPRESS-417" + */ + @Test + public void skipsDevNumbersWhenEntryIsNoDevice() throws Exception { + try (TarArchiveInputStream is = getTestStream("/COMPRESS-417.tar")) { + assertEquals("test1.xml", is.getNextTarEntry().getName()); + assertEquals("test2.xml", is.getNextTarEntry().getName()); + assertNull(is.getNextTarEntry()); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + try (FileInputStream in = new FileInputStream(getFile("bla.tar")); + TarArchiveInputStream archive = new TarArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read()); + assertEquals(-1, archive.read()); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + byte[] buf = new byte[2]; + try (FileInputStream in = new FileInputStream(getFile("bla.tar")); + TarArchiveInputStream archive = new TarArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read(buf)); + assertEquals(-1, archive.read(buf)); + } + } + + private TarArchiveInputStream getTestStream(final String name) { + return new TarArchiveInputStream( + TarArchiveInputStreamTest.class.getResourceAsStream(name)); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java new file mode 100644 index 000000000..3bdfe9d62 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java @@ -0,0 +1,797 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.tar; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.security.MessageDigest; +import java.util.Calendar; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.utils.CharsetNames; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class TarArchiveOutputStreamTest extends AbstractTestCase { + + @Test + public void testCount() throws Exception { + final File f = File.createTempFile("commons-compress-tarcount", ".tar"); + f.deleteOnExit(); + final FileOutputStream fos = new FileOutputStream(f); + + final ArchiveOutputStream tarOut = new ArchiveStreamFactory() + .createArchiveOutputStream(ArchiveStreamFactory.TAR, fos); + + final File file1 = getFile("test1.xml"); + final TarArchiveEntry sEntry = new TarArchiveEntry(file1, file1.getName()); + tarOut.putArchiveEntry(sEntry); + + final FileInputStream in = new FileInputStream(file1); + final byte[] buf = new byte[8192]; + + int read = 0; + while ((read = in.read(buf)) > 0) { + tarOut.write(buf, 0, read); + } + + in.close(); + tarOut.closeArchiveEntry(); + tarOut.close(); + + assertEquals(f.length(), tarOut.getBytesWritten()); + } + + @Test + public void testMaxFileSizeError() throws Exception { + final TarArchiveEntry t = new TarArchiveEntry("foo"); + t.setSize(077777777777L); + TarArchiveOutputStream tos = + new TarArchiveOutputStream(new ByteArrayOutputStream()); + tos.putArchiveEntry(t); + t.setSize(0100000000000L); + tos = new TarArchiveOutputStream(new ByteArrayOutputStream()); + try { + tos.putArchiveEntry(t); + fail("Should have generated RuntimeException"); + } catch (final RuntimeException expected) { + } + } + + @Test + public void testBigNumberStarMode() throws Exception { + final TarArchiveEntry t = new TarArchiveEntry("foo"); + t.setSize(0100000000000L); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR); + tos.putArchiveEntry(t); + // make sure header is written to byte array + tos.write(new byte[10 * 1024]); + final byte[] data = bos.toByteArray(); + assertEquals(0x80, + data[TarConstants.NAMELEN + + TarConstants.MODELEN + + TarConstants.UIDLEN + + TarConstants.GIDLEN] & 0x80); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(0100000000000L, e.getSize()); + tin.close(); + // generates IOE because of unclosed entries. + // However we don't really want to create such large entries. + closeQuietly(tos); + } + + @Test + public void testBigNumberPosixMode() throws Exception { + final TarArchiveEntry t = new TarArchiveEntry("foo"); + t.setSize(0100000000000L); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); + tos.putArchiveEntry(t); + // make sure header is written to byte array + tos.write(new byte[10 * 1024]); + final byte[] data = bos.toByteArray(); + assertEquals("00000000000 ", + new String(data, + 1024 + TarConstants.NAMELEN + + TarConstants.MODELEN + + TarConstants.UIDLEN + + TarConstants.GIDLEN, 12, + CharsetNames.UTF_8)); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(0100000000000L, e.getSize()); + tin.close(); + // generates IOE because of unclosed entries. + // However we don't really want to create such large entries. + closeQuietly(tos); + } + + @Test + public void testWriteSimplePaxHeaders() throws Exception { + final Map<String, String> m = new HashMap<>(); + m.put("a", "b"); + final byte[] data = writePaxHeader(m); + assertEquals("00000000006 ", + new String(data, TarConstants.NAMELEN + + TarConstants.MODELEN + + TarConstants.UIDLEN + + TarConstants.GIDLEN, 12, + CharsetNames.UTF_8)); + assertEquals("6 a=b\n", new String(data, 512, 6, CharsetNames.UTF_8)); + } + + @Test + public void testPaxHeadersWithLength99() throws Exception { + final Map<String, String> m = new HashMap<>(); + m.put("a", + "0123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "012"); + final byte[] data = writePaxHeader(m); + assertEquals("00000000143 ", + new String(data, TarConstants.NAMELEN + + TarConstants.MODELEN + + TarConstants.UIDLEN + + TarConstants.GIDLEN, 12, + CharsetNames.UTF_8)); + assertEquals("99 a=0123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "012\n", new String(data, 512, 99, CharsetNames.UTF_8)); + } + + @Test + public void testPaxHeadersWithLength101() throws Exception { + final Map<String, String> m = new HashMap<>(); + m.put("a", + "0123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "0123"); + final byte[] data = writePaxHeader(m); + assertEquals("00000000145 ", + new String(data, TarConstants.NAMELEN + + TarConstants.MODELEN + + TarConstants.UIDLEN + + TarConstants.GIDLEN, 12, + CharsetNames.UTF_8)); + assertEquals("101 a=0123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "0123\n", new String(data, 512, 101, CharsetNames.UTF_8)); + } + + private byte[] writePaxHeader(final Map<String, String> m) throws Exception { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.writePaxHeaders(new TarArchiveEntry("x"), "foo", m); + + // add a dummy entry so data gets written + final TarArchiveEntry t = new TarArchiveEntry("foo"); + t.setSize(10 * 1024); + tos.putArchiveEntry(t); + tos.write(new byte[10 * 1024]); + tos.closeArchiveEntry(); + tos.close(); + + return bos.toByteArray(); + } + + @Test + public void testWriteLongFileNamePosixMode() throws Exception { + final String n = "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789"; + final TarArchiveEntry t = + new TarArchiveEntry(n); + t.setSize(10 * 1024); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX); + tos.putArchiveEntry(t); + tos.write(new byte[10 * 1024]); + tos.closeArchiveEntry(); + final byte[] data = bos.toByteArray(); + assertEquals("160 path=" + n + "\n", + new String(data, 512, 160, CharsetNames.UTF_8)); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getName()); + tin.close(); + tos.close(); + } + + @Test + public void testOldEntryStarMode() throws Exception { + final TarArchiveEntry t = new TarArchiveEntry("foo"); + t.setSize(Integer.MAX_VALUE); + t.setModTime(-1000); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR); + tos.putArchiveEntry(t); + // make sure header is written to byte array + tos.write(new byte[10 * 1024]); + final byte[] data = bos.toByteArray(); + assertEquals((byte) 0xff, + data[TarConstants.NAMELEN + + TarConstants.MODELEN + + TarConstants.UIDLEN + + TarConstants.GIDLEN + + TarConstants.SIZELEN]); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + cal.set(1969, 11, 31, 23, 59, 59); + cal.set(Calendar.MILLISECOND, 0); + assertEquals(cal.getTime(), e.getLastModifiedDate()); + tin.close(); + // generates IOE because of unclosed entries. + // However we don't really want to create such large entries. + closeQuietly(tos); + } + + @Test + public void testOldEntryPosixMode() throws Exception { + final TarArchiveEntry t = new TarArchiveEntry("foo"); + t.setSize(Integer.MAX_VALUE); + t.setModTime(-1000); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); + tos.putArchiveEntry(t); + // make sure header is written to byte array + tos.write(new byte[10 * 1024]); + final byte[] data = bos.toByteArray(); + assertEquals("00000000000 ", + new String(data, + 1024 + TarConstants.NAMELEN + + TarConstants.MODELEN + + TarConstants.UIDLEN + + TarConstants.GIDLEN + + TarConstants.SIZELEN, 12, + CharsetNames.UTF_8)); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + cal.set(1969, 11, 31, 23, 59, 59); + cal.set(Calendar.MILLISECOND, 0); + assertEquals(cal.getTime(), e.getLastModifiedDate()); + tin.close(); + // generates IOE because of unclosed entries. + // However we don't really want to create such large entries. + closeQuietly(tos); + } + + @Test + public void testOldEntryError() throws Exception { + final TarArchiveEntry t = new TarArchiveEntry("foo"); + t.setSize(Integer.MAX_VALUE); + t.setModTime(-1000); + final TarArchiveOutputStream tos = + new TarArchiveOutputStream(new ByteArrayOutputStream()); + try { + tos.putArchiveEntry(t); + fail("Should have generated RuntimeException"); + } catch (final RuntimeException expected) { + } + tos.close(); + } + + @Test + public void testWriteNonAsciiPathNamePaxHeader() throws Exception { + final String n = "\u00e4"; + final TarArchiveEntry t = new TarArchiveEntry(n); + t.setSize(10 * 1024); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setAddPaxHeadersForNonAsciiNames(true); + tos.putArchiveEntry(t); + tos.write(new byte[10 * 1024]); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + assertEquals("11 path=" + n + "\n", + new String(data, 512, 11, CharsetNames.UTF_8)); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getName()); + tin.close(); + } + + @Test + public void testWriteNonAsciiLinkPathNamePaxHeader() throws Exception { + final String n = "\u00e4"; + final TarArchiveEntry t = new TarArchiveEntry("a", TarConstants.LF_LINK); + t.setSize(10 * 1024); + t.setLinkName(n); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setAddPaxHeadersForNonAsciiNames(true); + tos.putArchiveEntry(t); + tos.write(new byte[10 * 1024]); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + assertEquals("15 linkpath=" + n + "\n", + new String(data, 512, 15, CharsetNames.UTF_8)); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getLinkName()); + tin.close(); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-200" + */ + @Test + public void testRoundtripWith67CharFileNameGnu() throws Exception { + testRoundtripWith67CharFileName(TarArchiveOutputStream.LONGFILE_GNU); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-200" + */ + @Test + public void testRoundtripWith67CharFileNamePosix() throws Exception { + testRoundtripWith67CharFileName(TarArchiveOutputStream.LONGFILE_POSIX); + } + + private void testRoundtripWith67CharFileName(final int mode) throws Exception { + final String n = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + + "AAAAAAA"; + assertEquals(67, n.length()); + final TarArchiveEntry t = new TarArchiveEntry(n); + t.setSize(10 * 1024); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.setLongFileMode(mode); + tos.putArchiveEntry(t); + tos.write(new byte[10 * 1024]); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getName()); + tin.close(); + } + + @Test + public void testWriteLongDirectoryNameErrorMode() throws Exception { + final String n = "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789/"; + + try { + final TarArchiveEntry t = new TarArchiveEntry(n); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_ERROR); + tos.putArchiveEntry(t); + tos.closeArchiveEntry(); + tos.close(); + + fail("Truncated name didn't throw an exception"); + } catch (final RuntimeException e) { + // expected + } + } + + @Test + public void testWriteLongDirectoryNameTruncateMode() throws Exception { + final String n = "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789/"; + final TarArchiveEntry t = new TarArchiveEntry(n); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_TRUNCATE); + tos.putArchiveEntry(t); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals("Entry name", n.substring(0, TarConstants.NAMELEN) + "/", e.getName()); + assertTrue("The entry is not a directory", e.isDirectory()); + tin.close(); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-203" + */ + @Test + public void testWriteLongDirectoryNameGnuMode() throws Exception { + testWriteLongDirectoryName(TarArchiveOutputStream.LONGFILE_GNU); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-203" + */ + @Test + public void testWriteLongDirectoryNamePosixMode() throws Exception { + testWriteLongDirectoryName(TarArchiveOutputStream.LONGFILE_POSIX); + } + + private void testWriteLongDirectoryName(final int mode) throws Exception { + final String n = "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789/"; + final TarArchiveEntry t = new TarArchiveEntry(n); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.setLongFileMode(mode); + tos.putArchiveEntry(t); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getName()); + assertTrue(e.isDirectory()); + tin.close(); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-203" + */ + @Test + public void testWriteNonAsciiDirectoryNamePosixMode() throws Exception { + final String n = "f\u00f6\u00f6/"; + final TarArchiveEntry t = new TarArchiveEntry(n); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setAddPaxHeadersForNonAsciiNames(true); + tos.putArchiveEntry(t); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getName()); + assertTrue(e.isDirectory()); + tin.close(); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-265" + */ + @Test + public void testWriteNonAsciiNameWithUnfortunateNamePosixMode() throws Exception { + final String n = "f\u00f6\u00f6\u00dc"; + final TarArchiveEntry t = new TarArchiveEntry(n); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setAddPaxHeadersForNonAsciiNames(true); + tos.putArchiveEntry(t); + tos.closeArchiveEntry(); + tos.close(); + final byte[] data = bos.toByteArray(); + final TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getName()); + assertFalse(e.isDirectory()); + tin.close(); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-237" + */ + @Test + public void testWriteLongLinkNameErrorMode() throws Exception { + final String linkname = "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789/test"; + final TarArchiveEntry entry = new TarArchiveEntry("test", TarConstants.LF_SYMLINK); + entry.setLinkName(linkname); + + try { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_ERROR); + tos.putArchiveEntry(entry); + tos.closeArchiveEntry(); + tos.close(); + + fail("Truncated link name didn't throw an exception"); + } catch (final RuntimeException e) { + // expected + } + } + + @Test + public void testWriteLongLinkNameTruncateMode() throws Exception { + final String linkname = "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789/"; + final TarArchiveEntry entry = new TarArchiveEntry("test", TarConstants.LF_SYMLINK); + entry.setLinkName(linkname); + + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_TRUNCATE); + tos.putArchiveEntry(entry); + tos.closeArchiveEntry(); + tos.close(); + + final byte[] data = bos.toByteArray(); + final TarArchiveInputStream tin = new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals("Link name", linkname.substring(0, TarConstants.NAMELEN), e.getLinkName()); + tin.close(); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-237" + */ + @Test + public void testWriteLongLinkNameGnuMode() throws Exception { + testWriteLongLinkName(TarArchiveOutputStream.LONGFILE_GNU); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-237" + */ + @Test + public void testWriteLongLinkNamePosixMode() throws Exception { + testWriteLongLinkName(TarArchiveOutputStream.LONGFILE_POSIX); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-237" + */ + private void testWriteLongLinkName(final int mode) throws Exception { + final String linkname = "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789" + + "01234567890123456789012345678901234567890123456789/test"; + final TarArchiveEntry entry = new TarArchiveEntry("test", TarConstants.LF_SYMLINK); + entry.setLinkName(linkname); + + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tos = new TarArchiveOutputStream(bos, "ASCII"); + tos.setLongFileMode(mode); + tos.putArchiveEntry(entry); + tos.closeArchiveEntry(); + tos.close(); + + final byte[] data = bos.toByteArray(); + final TarArchiveInputStream tin = new TarArchiveInputStream(new ByteArrayInputStream(data)); + final TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals("Entry name", "test", e.getName()); + assertEquals("Link name", linkname, e.getLinkName()); + assertTrue("The entry is not a symbolic link", e.isSymbolicLink()); + tin.close(); + } + + @SuppressWarnings("deprecation") + @Test public void testRecordSize() throws IOException { + try { + TarArchiveOutputStream tos = + new TarArchiveOutputStream(new ByteArrayOutputStream(),512,511); + fail("should have rejected recordSize of 511"); + } catch(IllegalArgumentException e) { + // expected; + } + try { + TarArchiveOutputStream tos = + new TarArchiveOutputStream(new ByteArrayOutputStream(),512,511,null); + fail("should have rejected recordSize of 511"); + } catch(IllegalArgumentException e) { + // expected; + } + try (TarArchiveOutputStream tos = new TarArchiveOutputStream(new ByteArrayOutputStream(), + 512, 512)) { + assertEquals("recordSize",512,tos.getRecordSize()); + } + try (TarArchiveOutputStream tos = new TarArchiveOutputStream(new ByteArrayOutputStream(), + 512, 512, null)) { + assertEquals("recordSize",512,tos.getRecordSize()); + } + } + @Test + public void testBlockSizes() throws Exception { + String fileName = "/test1.xml"; + byte[] contents = getResourceContents(fileName); + testPadding(TarConstants.DEFAULT_BLKSIZE, fileName, contents); // USTAR / pre-pax + testPadding(5120, fileName, contents); // PAX default + testPadding(1<<15, fileName, contents); //PAX max + testPadding(-2, fileName, contents); // don't specify a block size -> use minimum length + try { + testPadding(511, fileName, contents); // don't specify a block size -> use minimum length + fail("should have thrown an illegal argument exception"); + } catch (IllegalArgumentException e) { + //expected + } + try { + testPadding(0, fileName, contents); // don't specify a block size -> use minimum length + fail("should have thrown an illegal argument exception"); + } catch (IllegalArgumentException e) { + //expected + } + // test with "content" that is an exact multiple of record length + contents = new byte[2048]; + java.util.Arrays.fill(contents, (byte) 42); + testPadding(TarConstants.DEFAULT_BLKSIZE, fileName, contents); + } + + private void testPadding(int blockSize, String fileName, byte[] contents) throws IOException { + final File f = File.createTempFile("commons-compress-padding", ".tar"); + f.deleteOnExit(); + final FileOutputStream fos = new FileOutputStream(f); + final TarArchiveOutputStream tos; + if (blockSize != -2) { + tos = new TarArchiveOutputStream(fos, blockSize); + } else { + blockSize = 512; + tos = new TarArchiveOutputStream(fos); + } + TarArchiveEntry sEntry; + sEntry = new TarArchiveEntry(fileName); + sEntry.setSize(contents.length); + tos.putArchiveEntry(sEntry); + tos.write(contents); + tos.closeArchiveEntry(); + tos.close(); + int fileRecordsSize = (int) Math.ceil((double) contents.length / 512) * 512; + final int headerSize = 512; + final int endOfArchiveSize = 1024; + int unpaddedSize = headerSize + fileRecordsSize + endOfArchiveSize; + int paddedSize = (int) Math.ceil((double)unpaddedSize/blockSize)*blockSize; + assertEquals(paddedSize, f.length()); + } + + private byte[] getResourceContents(String name) throws IOException { + ByteArrayOutputStream bos; + try (InputStream resourceAsStream = getClass().getResourceAsStream(name)) { + bos = new ByteArrayOutputStream(); + IOUtils.copy(resourceAsStream, bos); + } + return bos.toByteArray(); + } + @Test public void testPutGlobalPaxHeaderEntry() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + int pid = 73; + int globCount = 1; + byte lfPaxGlobalExtendedHeader = TarConstants.LF_PAX_GLOBAL_EXTENDED_HEADER; + TarArchiveEntry globalHeader = new TarArchiveEntry("/tmp/GlobalHead." + pid + "." + globCount, + lfPaxGlobalExtendedHeader); + globalHeader.addPaxHeader("SCHILLY.xattr.user.org.apache.weasels","global-weasels"); + tos.putArchiveEntry(globalHeader); + TarArchiveEntry entry = new TarArchiveEntry("message"); + String x = "If at first you don't succeed, give up"; + entry.setSize(x.length()); + tos.putArchiveEntry(entry); + tos.write(x.getBytes()); + tos.closeArchiveEntry(); + entry = new TarArchiveEntry("counter-message"); + String y = "Nothing succeeds like excess"; + entry.setSize(y.length()); + entry.addPaxHeader("SCHILLY.xattr.user.org.apache.weasels.species","unknown"); + tos.putArchiveEntry(entry); + tos.write(y.getBytes()); + tos.closeArchiveEntry(); + tos.close(); + TarArchiveInputStream in = new TarArchiveInputStream(new ByteArrayInputStream(bos.toByteArray())); + TarArchiveEntry entryIn = in.getNextTarEntry(); + assertNotNull(entryIn); + assertEquals("message",entryIn.getName()); + assertEquals("global-weasels",entryIn.getExtraPaxHeader("SCHILLY.xattr.user.org.apache.weasels")); + Reader reader = new InputStreamReader(in); + for(int i=0;i<x.length();i++) { + assertEquals(x.charAt(i),reader.read()); + } + assertEquals(-1,reader.read()); + entryIn = in.getNextTarEntry(); + assertEquals("counter-message",entryIn.getName()); + assertEquals("global-weasels",entryIn.getExtraPaxHeader("SCHILLY.xattr.user.org.apache.weasels")); + assertEquals("unknown",entryIn.getExtraPaxHeader("SCHILLY.xattr.user.org.apache.weasels.species")); + assertNull(in.getNextTarEntry()); + } + + /** + * When using long file names the longLinkEntry included the current timestamp as the Entry + * modification date. This was never exposed to the client but it caused identical archives to + * have different MD5 hashes. + */ + @Test + public void testLongNameMd5Hash() throws Exception { + final String longFileName = "a/considerably/longer/file/name/which/forces/use/of/the/long/link/header/which/appears/to/always/use/the/current/time/as/modification/date"; + final String fname = longFileName; + final Date modificationDate = new Date(); + + final byte[] archive1 = createTarArchiveContainingOneDirectory(fname, modificationDate); + final byte[] digest1 = MessageDigest.getInstance("MD5").digest(archive1); + + // let a second elapse otherwise the modification dates will be equal + Thread.sleep(1000L); + + // now recreate exactly the same tar file + final byte[] archive2 = createTarArchiveContainingOneDirectory(fname, modificationDate); + // and I would expect the MD5 hash to be the same, but for long names it isn't + final byte[] digest2 = MessageDigest.getInstance("MD5").digest(archive2); + + Assert.assertArrayEquals(digest1, digest2); + + // do I still have the correct modification date? + // let a second elapse so we don't get the current time + Thread.sleep(1000); + final TarArchiveInputStream tarIn = new TarArchiveInputStream( + new ByteArrayInputStream(archive2)); + final ArchiveEntry nextEntry = tarIn.getNextEntry(); + assertEquals(longFileName, nextEntry.getName()); + // tar archive stores modification time to second granularity only (floored) + assertEquals(modificationDate.getTime() / 1000, + nextEntry.getLastModifiedDate().getTime() / 1000); + tarIn.close(); + } + + private static byte[] createTarArchiveContainingOneDirectory(final String fname, + final Date modificationDate) throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final TarArchiveOutputStream tarOut = new TarArchiveOutputStream(baos, 1024); + tarOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); + final TarArchiveEntry tarEntry = new TarArchiveEntry("d"); + tarEntry.setModTime(modificationDate); + tarEntry.setMode(TarArchiveEntry.DEFAULT_DIR_MODE); + tarEntry.setModTime(modificationDate.getTime()); + tarEntry.setName(fname); + tarOut.putArchiveEntry(tarEntry); + tarOut.closeArchiveEntry(); + tarOut.close(); + + return baos.toByteArray(); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java new file mode 100644 index 000000000..6d2a32e58 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.tar; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +/** + * Simple command line application that lists the contents of a tar archive. + * + * <p>The name of the archive must be given as a command line argument.</p> + * <p>The optional second argument specifies the encoding to assume for file names.</p> + * + * @since 1.11 + */ +public final class TarLister { + + public static void main(final String[] args) throws Exception { + if (args.length == 0) { + usage(); + return; + } + System.out.println("Analysing " + args[0]); + final File f = new File(args[0]); + if (!f.isFile()) { + System.err.println(f + " doesn't exist or is a directory"); + } + final InputStream fis = new BufferedInputStream(new FileInputStream(f)); + TarArchiveInputStream ais; + if (args.length > 1) { + ais = new TarArchiveInputStream(fis, args[1]); + } else { + ais = new TarArchiveInputStream(fis); + } + System.out.println("Created " + ais.toString()); + TarArchiveEntry ae; + while((ae=ais.getNextTarEntry()) != null){ + log(ae); + } + ais.close(); + fis.close(); + } + + private static void usage() { + System.out.println("Parameters: archive-name [encoding]"); + } + + private static void log(final TarArchiveEntry ae) { + final StringBuilder sb = new StringBuilder(Integer.toOctalString(ae.getMode())) + .append(" "); + String name = ae.getUserName(); + if (name != null && name.length() > 0) { + sb.append(name); + } else { + sb.append(ae.getLongUserId()); + } + sb.append("/"); + name = ae.getGroupName(); + if (name != null && name.length() > 0) { + sb.append(name); + } else { + sb.append(ae.getLongGroupId()); + } + sb.append(" "); + if (ae.isSparse()) { + sb.append(ae.getRealSize()); + } else if (ae.isCharacterDevice() || ae.isBlockDevice()) { + sb.append(ae.getDevMajor()).append(",").append(ae.getDevMinor()); + } else { + sb.append(ae.getSize()); + } + sb.append(" ").append(ae.getLastModifiedDate()).append(" "); + sb.append(ae.getName()); + if (ae.isSymbolicLink() || ae.isLink()) { + if (ae.isSymbolicLink()) { + sb.append(" -> "); + } else { + sb.append(" link to "); + } + sb.append(ae.getLinkName()); + } + if (ae.isSparse()) { + sb.append(" (sparse)"); + } + System.out.println(sb); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java new file mode 100644 index 000000000..393c0aa1c --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java @@ -0,0 +1,384 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.tar; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; +import org.apache.commons.compress.utils.CharsetNames; +import org.junit.Test; + +public class TarUtilsTest { + + + @Test + public void testName(){ + byte [] buff = new byte[20]; + final String sb1 = "abcdefghijklmnopqrstuvwxyz"; + int off = TarUtils.formatNameBytes(sb1, buff, 1, buff.length-1); + assertEquals(off, 20); + String sb2 = TarUtils.parseName(buff, 1, 10); + assertEquals(sb2,sb1.substring(0,10)); + sb2 = TarUtils.parseName(buff, 1, 19); + assertEquals(sb2,sb1.substring(0,19)); + buff = new byte[30]; + off = TarUtils.formatNameBytes(sb1, buff, 1, buff.length-1); + assertEquals(off, 30); + sb2 = TarUtils.parseName(buff, 1, buff.length-1); + assertEquals(sb1, sb2); + buff = new byte[]{0, 1, 0}; + sb2 = TarUtils.parseName(buff, 0, 3); + assertEquals("", sb2); + } + + @Test + public void testParseOctal() throws Exception{ + long value; + byte [] buffer; + final long MAX_OCTAL = 077777777777L; // Allowed 11 digits + final long MAX_OCTAL_OVERFLOW = 0777777777777L; // in fact 12 for some implementations + final String maxOctal = "777777777777"; // Maximum valid octal + buffer = maxOctal.getBytes(CharsetNames.UTF_8); + value = TarUtils.parseOctal(buffer,0, buffer.length); + assertEquals(MAX_OCTAL_OVERFLOW, value); + buffer[buffer.length - 1] = ' '; + value = TarUtils.parseOctal(buffer,0, buffer.length); + assertEquals(MAX_OCTAL, value); + buffer[buffer.length-1]=0; + value = TarUtils.parseOctal(buffer,0, buffer.length); + assertEquals(MAX_OCTAL, value); + buffer=new byte[]{0,0}; + value = TarUtils.parseOctal(buffer,0, buffer.length); + assertEquals(0, value); + buffer=new byte[]{0,' '}; + value = TarUtils.parseOctal(buffer,0, buffer.length); + assertEquals(0, value); + buffer=new byte[]{' ',0}; + value = TarUtils.parseOctal(buffer,0, buffer.length); + assertEquals(0, value); + } + + @Test + public void testParseOctalInvalid() throws Exception{ + byte [] buffer; + buffer=new byte[0]; // empty byte array + try { + TarUtils.parseOctal(buffer,0, buffer.length); + fail("Expected IllegalArgumentException - should be at least 2 bytes long"); + } catch (final IllegalArgumentException expected) { + } + buffer=new byte[]{0}; // 1-byte array + try { + TarUtils.parseOctal(buffer,0, buffer.length); + fail("Expected IllegalArgumentException - should be at least 2 bytes long"); + } catch (final IllegalArgumentException expected) { + } + buffer = "abcdef ".getBytes(CharsetNames.UTF_8); // Invalid input + try { + TarUtils.parseOctal(buffer,0, buffer.length); + fail("Expected IllegalArgumentException"); + } catch (final IllegalArgumentException expected) { + } + buffer = " 0 07 ".getBytes(CharsetNames.UTF_8); // Invalid - embedded space + try { + TarUtils.parseOctal(buffer,0, buffer.length); + fail("Expected IllegalArgumentException - embedded space"); + } catch (final IllegalArgumentException expected) { + } + buffer = " 0\00007 ".getBytes(CharsetNames.UTF_8); // Invalid - embedded NUL + try { + TarUtils.parseOctal(buffer,0, buffer.length); + fail("Expected IllegalArgumentException - embedded NUL"); + } catch (final IllegalArgumentException expected) { + } + } + + private void checkRoundTripOctal(final long value, final int bufsize) { + final byte [] buffer = new byte[bufsize]; + long parseValue; + TarUtils.formatLongOctalBytes(value, buffer, 0, buffer.length); + parseValue = TarUtils.parseOctal(buffer,0, buffer.length); + assertEquals(value,parseValue); + } + + private void checkRoundTripOctal(final long value) { + checkRoundTripOctal(value, TarConstants.SIZELEN); + } + + @Test + public void testRoundTripOctal() { + checkRoundTripOctal(0); + checkRoundTripOctal(1); +// checkRoundTripOctal(-1); // TODO What should this do? + checkRoundTripOctal(TarConstants.MAXSIZE); +// checkRoundTripOctal(0100000000000L); // TODO What should this do? + + checkRoundTripOctal(0, TarConstants.UIDLEN); + checkRoundTripOctal(1, TarConstants.UIDLEN); + checkRoundTripOctal(TarConstants.MAXID, 8); + } + + private void checkRoundTripOctalOrBinary(final long value, final int bufsize) { + final byte [] buffer = new byte[bufsize]; + long parseValue; + TarUtils.formatLongOctalOrBinaryBytes(value, buffer, 0, buffer.length); + parseValue = TarUtils.parseOctalOrBinary(buffer,0, buffer.length); + assertEquals(value,parseValue); + } + + @Test + public void testRoundTripOctalOrBinary8() { + testRoundTripOctalOrBinary(8); + } + + @Test + public void testRoundTripOctalOrBinary12() { + testRoundTripOctalOrBinary(12); + checkRoundTripOctalOrBinary(Long.MAX_VALUE, 12); + checkRoundTripOctalOrBinary(Long.MIN_VALUE + 1, 12); + } + + private void testRoundTripOctalOrBinary(final int length) { + checkRoundTripOctalOrBinary(0, length); + checkRoundTripOctalOrBinary(1, length); + checkRoundTripOctalOrBinary(TarConstants.MAXSIZE, length); // will need binary format + checkRoundTripOctalOrBinary(-1, length); // will need binary format + checkRoundTripOctalOrBinary(0xffffffffffffffl, length); + checkRoundTripOctalOrBinary(-0xffffffffffffffl, length); + } + + // Check correct trailing bytes are generated + @Test + public void testTrailers() { + final byte [] buffer = new byte[12]; + TarUtils.formatLongOctalBytes(123, buffer, 0, buffer.length); + assertEquals(' ', buffer[buffer.length-1]); + assertEquals('3', buffer[buffer.length-2]); // end of number + TarUtils.formatOctalBytes(123, buffer, 0, buffer.length); + assertEquals(0 , buffer[buffer.length-1]); + assertEquals(' ', buffer[buffer.length-2]); + assertEquals('3', buffer[buffer.length-3]); // end of number + TarUtils.formatCheckSumOctalBytes(123, buffer, 0, buffer.length); + assertEquals(' ', buffer[buffer.length-1]); + assertEquals(0 , buffer[buffer.length-2]); + assertEquals('3', buffer[buffer.length-3]); // end of number + } + + @Test + public void testNegative() throws Exception { + final byte [] buffer = new byte[22]; + TarUtils.formatUnsignedOctalString(-1, buffer, 0, buffer.length); + assertEquals("1777777777777777777777", new String(buffer, CharsetNames.UTF_8)); + } + + @Test + public void testOverflow() throws Exception { + final byte [] buffer = new byte[8-1]; // a lot of the numbers have 8-byte buffers (nul term) + TarUtils.formatUnsignedOctalString(07777777L, buffer, 0, buffer.length); + assertEquals("7777777", new String(buffer, CharsetNames.UTF_8)); + try { + TarUtils.formatUnsignedOctalString(017777777L, buffer, 0, buffer.length); + fail("Should have cause IllegalArgumentException"); + } catch (final IllegalArgumentException expected) { + } + } + + @Test + public void testRoundTripNames(){ + checkName(""); + checkName("The quick brown fox\n"); + checkName("\177"); + // checkName("\0"); // does not work, because NUL is ignored + } + + @Test + public void testRoundEncoding() throws Exception { + // COMPRESS-114 + final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(CharsetNames.ISO_8859_1); + final String s = "0302-0601-3\u00b1\u00b1\u00b1F06\u00b1W220\u00b1ZB\u00b1LALALA\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1CAN\u00b1\u00b1DC\u00b1\u00b1\u00b104\u00b1060302\u00b1MOE.model"; + final byte buff[] = new byte[100]; + final int len = TarUtils.formatNameBytes(s, buff, 0, buff.length, enc); + assertEquals(s, TarUtils.parseName(buff, 0, len, enc)); + } + + private void checkName(final String string) { + final byte buff[] = new byte[100]; + final int len = TarUtils.formatNameBytes(string, buff, 0, buff.length); + assertEquals(string, TarUtils.parseName(buff, 0, len)); + } + + @Test + public void testReadNegativeBinary8Byte() { + final byte[] b = new byte[] { + (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, + (byte) 0xff, (byte) 0xff, (byte) 0xf1, (byte) 0xef, + }; + assertEquals(-3601l, TarUtils.parseOctalOrBinary(b, 0, 8)); + } + + @Test + public void testReadNegativeBinary12Byte() { + final byte[] b = new byte[] { + (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, + (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, + (byte) 0xff, (byte) 0xff, (byte) 0xf1, (byte) 0xef, + }; + assertEquals(-3601l, TarUtils.parseOctalOrBinary(b, 0, 12)); + } + + + @Test + public void testWriteNegativeBinary8Byte() { + final byte[] b = new byte[] { + (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, + (byte) 0xff, (byte) 0xff, (byte) 0xf1, (byte) 0xef, + }; + assertEquals(-3601l, TarUtils.parseOctalOrBinary(b, 0, 8)); + } + + // https://issues.apache.org/jira/browse/COMPRESS-191 + @Test + public void testVerifyHeaderCheckSum() { + final byte[] valid = { // from bla.tar + 116, 101, 115, 116, 49, 46, 120, 109, 108, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 48, 48, 48, 48, 54, 52, 52, 0, 48, 48, 48, 48, 55, 54, 53, + 0, 48, 48, 48, 48, 55, 54, 53, 0, 48, 48, 48, 48, 48, 48, 48, + 49, 49, 52, 50, 0, 49, 48, 55, 49, 54, 53, 52, 53, 54, 50, 54, + 0, 48, 49, 50, 50, 54, 48, 0, 32, 48, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 117, 115, 116, 97, 114, 32, 32, 0, + 116, 99, 117, 114, 100, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 99, 117, + 114, 100, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 }; + assertTrue(TarUtils.verifyCheckSum(valid)); + + final byte[] compress117 = { // from COMPRESS-117 + (byte) 0x37, (byte) 0x7a, (byte) 0x43, (byte) 0x2e, (byte) 0x74, (byte) 0x78, (byte) 0x74, (byte) 0x00, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, (byte) 0x31, (byte) 0x30, (byte) 0x30, (byte) 0x37, + (byte) 0x37, (byte) 0x37, (byte) 0x20, (byte) 0x00, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x30, (byte) 0x20, (byte) 0x00, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x30, (byte) 0x20, (byte) 0x00, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x31, (byte) 0x33, (byte) 0x30, (byte) 0x33, (byte) 0x33, (byte) 0x20, + (byte) 0x31, (byte) 0x31, (byte) 0x31, (byte) 0x31, (byte) 0x35, (byte) 0x31, (byte) 0x36, (byte) 0x36, + (byte) 0x30, (byte) 0x31, (byte) 0x36, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x35, (byte) 0x34, + (byte) 0x31, (byte) 0x37, (byte) 0x20, (byte) 0x00, (byte) 0x30, (byte) 0x00, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + assertTrue(TarUtils.verifyCheckSum(compress117)); + + final byte[] invalid = { // from the testAIFF.aif file in Tika + 70, 79, 82, 77, 0, 0, 15, 46, 65, 73, 70, 70, 67, 79, 77, 77, + 0, 0, 0, 18, 0, 2, 0, 0, 3, -64, 0, 16, 64, 14, -84, 68, 0, 0, + 0, 0, 0, 0, 83, 83, 78, 68, 0, 0, 15, 8, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, -1, -1, 0, 1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 1, -1, -1, + 0, 0, 0, 0, 0, 0, -1, -1, 0, 2, -1, -2, 0, 2, -1, -1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, -1, -1, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 1, -1, -1, 0, 1, -1, -2, 0, 1, -1, -1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, + 2, -1, -2, 0, 2, -1, -1, 0, 0, 0, 1, -1, -1, 0, 1, -1, -1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, -2, 0, 2, -1, -2, 0, 1, 0, 0, + 0, 1, -1, -1, 0, 0, 0, 1, -1, -1, 0, 0, 0, 1, -1, -2, 0, 2, + -1, -1, 0, 0, 0, 0, 0, 0, -1, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 1, -1, -1, 0, 2, -1, -2, + 0, 2, -1, -2, 0, 2, -1, -2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, -1, + -2, 0, 2, -1, -2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, 0, 1, 0, 0, -1, -1, 0, 2, -1, -2, 0, 2, -1, -1, 0, 0, + 0, 0, 0, 0, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 2, -1, -2, + 0, 1, 0, 0, -1, -1, 0, 2, -1, -2, 0, 2, -1, -2, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, 0, + 0, -1, -1, 0, 1, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, -2, 0, 2, -1, -1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, -2, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, 0, 0, -1, -1, 0, 2, -1, -2, + 0, 2, -1, -2, 0, 2, -1, -1, 0, 0, 0, 0, -1, -1, 0, 1, -1, -1, + 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, 0, 0, 0, 0, + -1, -1, 0, 2, -1, -2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, -1, -1, 0, 0, 0, 0, -1, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1 }; + assertFalse(TarUtils.verifyCheckSum(invalid)); + } + + @Test + public void testParseOctalCompress330() throws Exception{ + final long expected = 0100000; + final byte [] buffer = new byte[] { + 32, 32, 32, 32, 32, 49, 48, 48, 48, 48, 48, 32 + }; + assertEquals(expected, TarUtils.parseOctalOrBinary(buffer, 0, buffer.length)); + } + + @Test + public void testRoundTripOctalOrBinary8_ValueTooBigForBinary() { + try { + checkRoundTripOctalOrBinary(Long.MAX_VALUE, 8); + fail("Should throw exception - value is too long to fit buffer of this len"); + } catch (IllegalArgumentException e) { + assertEquals("Value 9223372036854775807 is too large for 8 byte field.", e.getMessage()); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/AsiExtraFieldTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/AsiExtraFieldTest.java new file mode 100644 index 000000000..844623c32 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/AsiExtraFieldTest.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import org.junit.Test; + +/** + * JUnit testcases for org.apache.commons.compress.archivers.zip.AsiExtraField. + * + */ +public class AsiExtraFieldTest implements UnixStat { + + /** + * Test file mode magic. + */ + @Test + public void testModes() { + final AsiExtraField a = new AsiExtraField(); + a.setMode(0123); + assertEquals("plain file", 0100123, a.getMode()); + a.setDirectory(true); + assertEquals("directory", 040123, a.getMode()); + a.setLinkedFile("test"); + assertEquals("symbolic link", 0120123, a.getMode()); + } + + /** + * Test content. + */ + @Test + public void testContent() { + final AsiExtraField a = new AsiExtraField(); + a.setMode(0123); + a.setUserId(5); + a.setGroupId(6); + byte[] b = a.getLocalFileDataData(); + + // CRC manually calculated, sorry + byte[] expect = {(byte)0xC6, 0x02, 0x78, (byte)0xB6, // CRC + 0123, (byte)0x80, // mode + 0, 0, 0, 0, // link length + 5, 0, 6, 0}; // uid, gid + assertEquals("no link", expect.length, b.length); + for (int i=0; i<expect.length; i++) { + assertEquals("no link, byte "+i, expect[i], b[i]); + } + + a.setLinkedFile("test"); + expect = new byte[] {0x75, (byte)0x8E, 0x41, (byte)0xFD, // CRC + 0123, (byte)0xA0, // mode + 4, 0, 0, 0, // link length + 5, 0, 6, 0, // uid, gid + (byte)'t', (byte)'e', (byte)'s', (byte)'t'}; + b = a.getLocalFileDataData(); + assertEquals("no link", expect.length, b.length); + for (int i=0; i<expect.length; i++) { + assertEquals("no link, byte "+i, expect[i], b[i]); + } + + } + + /** + * Test reparse + */ + @Test + public void testReparse() throws Exception { + // CRC manually calculated, sorry + byte[] data = {(byte)0xC6, 0x02, 0x78, (byte)0xB6, // CRC + 0123, (byte)0x80, // mode + 0, 0, 0, 0, // link length + 5, 0, 6, 0}; // uid, gid + AsiExtraField a = new AsiExtraField(); + a.parseFromLocalFileData(data, 0, data.length); + assertEquals("length plain file", data.length, + a.getLocalFileDataLength().getValue()); + assertTrue("plain file, no link", !a.isLink()); + assertTrue("plain file, no dir", !a.isDirectory()); + assertEquals("mode plain file", FILE_FLAG | 0123, a.getMode()); + assertEquals("uid plain file", 5, a.getUserId()); + assertEquals("gid plain file", 6, a.getGroupId()); + + data = new byte[] {0x75, (byte)0x8E, 0x41, (byte)0xFD, // CRC + 0123, (byte)0xA0, // mode + 4, 0, 0, 0, // link length + 5, 0, 6, 0, // uid, gid + (byte)'t', (byte)'e', (byte)'s', (byte)'t'}; + a = new AsiExtraField(); + a.parseFromLocalFileData(data, 0, data.length); + assertEquals("length link", data.length, + a.getLocalFileDataLength().getValue()); + assertTrue("link, is link", a.isLink()); + assertTrue("link, no dir", !a.isDirectory()); + assertEquals("mode link", LINK_FLAG | 0123, a.getMode()); + assertEquals("uid link", 5, a.getUserId()); + assertEquals("gid link", 6, a.getGroupId()); + assertEquals("test", a.getLinkedFile()); + + data = new byte[] {(byte)0x8E, 0x01, (byte)0xBF, (byte)0x0E, // CRC + 0123, (byte)0x40, // mode + 0, 0, 0, 0, // link + 5, 0, 6, 0}; // uid, gid + a = new AsiExtraField(); + a.parseFromLocalFileData(data, 0, data.length); + assertEquals("length dir", data.length, + a.getLocalFileDataLength().getValue()); + assertTrue("dir, no link", !a.isLink()); + assertTrue("dir, is dir", a.isDirectory()); + assertEquals("mode dir", DIR_FLAG | 0123, a.getMode()); + assertEquals("uid dir", 5, a.getUserId()); + assertEquals("gid dir", 6, a.getGroupId()); + + data = new byte[] {0, 0, 0, 0, // bad CRC + 0123, (byte)0x40, // mode + 0, 0, 0, 0, // link + 5, 0, 6, 0}; // uid, gid + a = new AsiExtraField(); + try { + a.parseFromLocalFileData(data, 0, data.length); + fail("should raise bad CRC exception"); + } catch (final Exception e) { + assertEquals("bad CRC checksum 0 instead of ebf018e", + e.getMessage()); + } + } + + @Test + public void testClone() { + final AsiExtraField s1 = new AsiExtraField(); + s1.setUserId(42); + s1.setGroupId(12); + s1.setLinkedFile("foo"); + s1.setMode(0644); + s1.setDirectory(true); + final AsiExtraField s2 = (AsiExtraField) s1.clone(); + assertNotSame(s1, s2); + assertEquals(s1.getUserId(), s2.getUserId()); + assertEquals(s1.getGroupId(), s2.getGroupId()); + assertEquals(s1.getLinkedFile(), s2.getLinkedFile()); + assertEquals(s1.getMode(), s2.getMode()); + assertEquals(s1.isDirectory(), s2.isDirectory()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/BinaryTreeTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/BinaryTreeTest.java new file mode 100644 index 000000000..ae4ba9dac --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/BinaryTreeTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.junit.Test; + +public class BinaryTreeTest { + + @Test + public void testDecode() throws IOException { + final InputStream in = new ByteArrayInputStream(new byte[] { 0x02, 0x42, 0x01, 0x13 }); + + final BinaryTree tree = BinaryTree.decode(in, 8); + + assertNotNull(tree); + + final BitStream stream = new BitStream(new ByteArrayInputStream(new byte[] { (byte) 0x8D, (byte) 0xC5, (byte) 0x11, 0x00 })); + assertEquals(0, tree.read(stream)); + assertEquals(1, tree.read(stream)); + assertEquals(2, tree.read(stream)); + assertEquals(3, tree.read(stream)); + assertEquals(4, tree.read(stream)); + assertEquals(5, tree.read(stream)); + assertEquals(6, tree.read(stream)); + assertEquals(7, tree.read(stream)); + } + @Test + public void testExceptions() { + BinaryTree binaryFinary = new BinaryTree(4); + binaryFinary.addLeaf(0,0,0,1); + try { + binaryFinary.addLeaf(0,0,0,1); + fail("should have thrown illegalArgumentException"); + } catch (IllegalArgumentException e) { + } + + InputStream is = new ByteArrayInputStream(new byte[]{}); + try { + BinaryTree.decode(is,0); + fail("should have thrown IOException"); + } catch (IOException e) { + + } + binaryFinary = new BinaryTree(4); + try { + binaryFinary.read(new BitStream(new ByteArrayInputStream(new byte[] {0}))); + fail("expected read fail"); + } catch (IOException e) { + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/BitStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/BitStreamTest.java new file mode 100644 index 000000000..f330c29db --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/BitStreamTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; + +import org.junit.Test; + +public class BitStreamTest { + + @Test + public void testEmptyStream() throws Exception { + final BitStream stream = new BitStream(new ByteArrayInputStream(new byte[0])); + assertEquals("next bit", -1, stream.nextBit()); + assertEquals("next bit", -1, stream.nextBit()); + assertEquals("next bit", -1, stream.nextBit()); + stream.close(); + } + + @Test + public void testStream() throws Exception { + final BitStream stream = new BitStream(new ByteArrayInputStream(new byte[] { (byte) 0xEA, 0x03 })); + + assertEquals("bit 0", 0, stream.nextBit()); + assertEquals("bit 1", 1, stream.nextBit()); + assertEquals("bit 2", 0, stream.nextBit()); + assertEquals("bit 3", 1, stream.nextBit()); + assertEquals("bit 4", 0, stream.nextBit()); + assertEquals("bit 5", 1, stream.nextBit()); + assertEquals("bit 6", 1, stream.nextBit()); + assertEquals("bit 7", 1, stream.nextBit()); + + assertEquals("bit 8", 1, stream.nextBit()); + assertEquals("bit 9", 1, stream.nextBit()); + assertEquals("bit 10", 0, stream.nextBit()); + assertEquals("bit 11", 0, stream.nextBit()); + assertEquals("bit 12", 0, stream.nextBit()); + assertEquals("bit 13", 0, stream.nextBit()); + assertEquals("bit 14", 0, stream.nextBit()); + assertEquals("bit 15", 0, stream.nextBit()); + + assertEquals("next bit", -1, stream.nextBit()); + stream.close(); + } + + @Test + public void testNextByteFromEmptyStream() throws Exception { + final BitStream stream = new BitStream(new ByteArrayInputStream(new byte[0])); + assertEquals("next byte", -1, stream.nextByte()); + assertEquals("next byte", -1, stream.nextByte()); + stream.close(); + } + + @Test + public void testReadAlignedBytes() throws Exception { + final BitStream stream = new BitStream(new ByteArrayInputStream(new byte[] { (byte) 0xEA, 0x35 })); + assertEquals("next byte", 0xEA, stream.nextByte()); + assertEquals("next byte", 0x35, stream.nextByte()); + assertEquals("next byte", -1, stream.nextByte()); + stream.close(); + } + + @Test + public void testNextByte() throws Exception { + final BitStream stream = new BitStream(new ByteArrayInputStream(new byte[] { (byte) 0xEA, 0x35 })); + assertEquals("bit 0", 0, stream.nextBit()); + assertEquals("bit 1", 1, stream.nextBit()); + assertEquals("bit 2", 0, stream.nextBit()); + assertEquals("bit 3", 1, stream.nextBit()); + + assertEquals("next byte", 0x5E, stream.nextByte()); + assertEquals("next byte", -1, stream.nextByte()); // not enough bits left to read a byte + stream.close(); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/CircularBufferTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/CircularBufferTest.java new file mode 100644 index 000000000..57b6b0116 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/CircularBufferTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class CircularBufferTest { + + @Test + public void testPutAndGet() throws Exception { + final int size = 16; + final CircularBuffer buffer = new CircularBuffer(size); + for (int i = 0; i < size / 2; i++) { + buffer.put(i); + } + + assertTrue("available", buffer.available()); + + for (int i = 0; i < size / 2; i++) { + assertEquals("buffer[" + i + "]", i, buffer.get()); + } + + assertEquals(-1, buffer.get()); + assertFalse("available", buffer.available()); + } + + @Test + public void testCopy() throws Exception { + final CircularBuffer buffer = new CircularBuffer(16); + + buffer.put(1); + buffer.put(2); + buffer.get(); + buffer.get(); + + // copy uninitialized data + buffer.copy(6, 8); + + for (int i = 2; i < 6; i++) { + assertEquals("buffer[" + i + "]", 0, buffer.get()); + } + assertEquals("buffer[" + 6 + "]", 1, buffer.get()); + assertEquals("buffer[" + 7 + "]", 2, buffer.get()); + assertEquals("buffer[" + 8 + "]", 0, buffer.get()); + assertEquals("buffer[" + 9 + "]", 0, buffer.get()); + + for (int i = 10; i < 14; i++) { + buffer.put(i); + buffer.get(); + } + + assertFalse("available", buffer.available()); + + // copy data and wrap + buffer.copy(2, 8); + + for (int i = 14; i < 18; i++) { + assertEquals("buffer[" + i + "]", i % 2 == 0 ? 12 : 13, buffer.get()); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java new file mode 100644 index 000000000..52078deb5 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.compress.utils.IOUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.apache.commons.compress.AbstractTestCase.mkdir; +import static org.apache.commons.compress.AbstractTestCase.rmdir; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +public class DataDescriptorTest { + + private File dir; + + @Before + public void setUp() throws Exception { + dir = mkdir("ddtest"); + } + + @After + public void tearDown() throws Exception { + rmdir(dir); + } + + @Test + public void writesDataDescriptorForDeflatedEntryOnUnseekableOutput() throws IOException { + ByteArrayOutputStream o = new ByteArrayOutputStream(); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(o)) { + zos.putArchiveEntry(new ZipArchiveEntry("test1.txt")); + zos.write("foo".getBytes("UTF-8")); + zos.closeArchiveEntry(); + } + byte[] data = o.toByteArray(); + + byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6); + // 2.0 because of DD + assertArrayEquals(new byte[] { 20, 0 }, versionInLFH); + byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8); + // DD and EFS flags + assertArrayEquals(new byte[] { 8, 8 }, gpbInLFH); + byte[] crcAndSizedInLFH = Arrays.copyOfRange(data, 14, 26); + assertArrayEquals(new byte[12], crcAndSizedInLFH); + + int cdhStart = findCentralDirectory(data); + byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart + 8); + assertArrayEquals(new byte[] { 20, 0 }, versionInCDH); + byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 10); + assertArrayEquals(new byte[] { 8, 8 }, gpbInCDH); + + int ddStart = cdhStart - 16; + assertEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart)); + long crcFromDD = ZipLong.getValue(data, ddStart + 4); + long cSizeFromDD = ZipLong.getValue(data, ddStart + 8); + long sizeFromDD = ZipLong.getValue(data, ddStart + 12); + assertEquals(3, sizeFromDD); + + long crcFromCDH = ZipLong.getValue(data, cdhStart + 16); + assertEquals(crcFromDD, crcFromCDH); + long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20); + assertEquals(cSizeFromDD, cSizeFromCDH); + long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24); + assertEquals(sizeFromDD, sizeFromCDH); + } + + @Test + public void doesntWriteDataDescriptorForDeflatedEntryOnSeekableOutput() throws IOException { + File f = new File(dir, "test.zip"); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(f)) { + zos.putArchiveEntry(new ZipArchiveEntry("test1.txt")); + zos.write("foo".getBytes("UTF-8")); + zos.closeArchiveEntry(); + } + + byte[] data; + try (FileInputStream fis = new FileInputStream(f)) { + data = IOUtils.toByteArray(fis); + } + + byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6); + // still 2.0 because of Deflate + assertArrayEquals(new byte[] { 20, 0 }, versionInLFH); + byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8); + // no DD but EFS flag + assertArrayEquals(new byte[] { 0, 8 }, gpbInLFH); + + int cdhStart = findCentralDirectory(data); + byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart + 8); + assertArrayEquals(new byte[] { 20, 0 }, versionInCDH); + byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 10); + assertArrayEquals(new byte[] { 0, 8 }, gpbInCDH); + + int ddStart = cdhStart - 16; + assertNotEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart)); + long crcFromLFH = ZipLong.getValue(data, 14); + long cSizeFromLFH = ZipLong.getValue(data, 18); + long sizeFromLFH = ZipLong.getValue(data, 22); + assertEquals(3, sizeFromLFH); + + long crcFromCDH = ZipLong.getValue(data, cdhStart + 16); + assertEquals(crcFromLFH, crcFromCDH); + long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20); + assertEquals(cSizeFromLFH, cSizeFromCDH); + long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24); + assertEquals(sizeFromLFH, sizeFromCDH); + } + + @Test + public void doesntWriteDataDescriptorWhenAddingRawEntries() throws IOException { + ByteArrayOutputStream init = new ByteArrayOutputStream(); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(init)) { + zos.putArchiveEntry(new ZipArchiveEntry("test1.txt")); + zos.write("foo".getBytes("UTF-8")); + zos.closeArchiveEntry(); + } + + File f = new File(dir, "test.zip"); + try (FileOutputStream fos = new FileOutputStream(f)) { + fos.write(init.toByteArray()); + } + + ByteArrayOutputStream o = new ByteArrayOutputStream(); + ZipArchiveEntry zae; + try (ZipFile zf = new ZipFile(f); + ZipArchiveOutputStream zos = new ZipArchiveOutputStream(o)) { + zae = zf.getEntry("test1.txt"); + zos.addRawArchiveEntry(zae, zf.getRawInputStream(zae)); + } + + byte[] data = o.toByteArray(); + byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6); + // still 2.0 because of Deflate + assertArrayEquals(new byte[] { 20, 0 }, versionInLFH); + byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8); + // no DD but EFS flag + assertArrayEquals(new byte[] { 0, 8 }, gpbInLFH); + + int cdhStart = findCentralDirectory(data); + byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart + 8); + assertArrayEquals(new byte[] { 20, 0 }, versionInCDH); + byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 10); + assertArrayEquals(new byte[] { 0, 8 }, gpbInCDH); + + int ddStart = cdhStart - 16; + assertNotEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart)); + long crcFromLFH = ZipLong.getValue(data, 14); + long cSizeFromLFH = ZipLong.getValue(data, 18); + long sizeFromLFH = ZipLong.getValue(data, 22); + assertEquals(3, sizeFromLFH); + + long crcFromCDH = ZipLong.getValue(data, cdhStart + 16); + assertEquals(crcFromLFH, crcFromCDH); + long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20); + assertEquals(cSizeFromLFH, cSizeFromCDH); + long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24); + assertEquals(sizeFromLFH, sizeFromCDH); + } + + private int findCentralDirectory(byte[] data) { + // not a ZIP64 archive, no comment, "End of central directory record" at the end + return (int) ZipLong.getValue(data, data.length - 22 + 16); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/EncryptedArchiveTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/EncryptedArchiveTest.java new file mode 100644 index 000000000..8c04c52e2 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/EncryptedArchiveTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.junit.Test; + +public class EncryptedArchiveTest { + + @Test + public void testReadPasswordEncryptedEntryViaZipFile() + throws IOException { + final File file = getFile("password-encrypted.zip"); + ZipFile zf = null; + try { + zf = new ZipFile(file); + final ZipArchiveEntry zae = zf.getEntry("LICENSE.txt"); + assertTrue(zae.getGeneralPurposeBit().usesEncryption()); + assertFalse(zae.getGeneralPurposeBit().usesStrongEncryption()); + assertFalse(zf.canReadEntryData(zae)); + try { + zf.getInputStream(zae); + fail("expected an exception"); + } catch (final UnsupportedZipFeatureException ex) { + assertSame(UnsupportedZipFeatureException.Feature.ENCRYPTION, + ex.getFeature()); + } + } finally { + ZipFile.closeQuietly(zf); + } + } + + @Test + public void testReadPasswordEncryptedEntryViaStream() + throws IOException { + final File file = getFile("password-encrypted.zip"); + ZipArchiveInputStream zin = null; + try { + zin = new ZipArchiveInputStream(new FileInputStream(file)); + final ZipArchiveEntry zae = zin.getNextZipEntry(); + assertEquals("LICENSE.txt", zae.getName()); + assertTrue(zae.getGeneralPurposeBit().usesEncryption()); + assertFalse(zae.getGeneralPurposeBit().usesStrongEncryption()); + assertFalse(zin.canReadEntryData(zae)); + try { + final byte[] buf = new byte[1024]; + zin.read(buf, 0, buf.length); + fail("expected an exception"); + } catch (final UnsupportedZipFeatureException ex) { + assertSame(UnsupportedZipFeatureException.Feature.ENCRYPTION, + ex.getFeature()); + } + } finally { + if (zin != null) { + zin.close(); + } + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ExplodeSupportTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ExplodeSupportTest.java new file mode 100644 index 000000000..67fbbb145 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ExplodeSupportTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.CRC32; +import java.util.zip.CheckedOutputStream; + +import org.apache.commons.compress.utils.BoundedInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public class ExplodeSupportTest { + + private void testArchiveWithImplodeCompression(final String filename, final String entryName) throws IOException { + final ZipFile zip = new ZipFile(new File(filename)); + final ZipArchiveEntry entry = zip.getEntries().nextElement(); + assertEquals("entry name", entryName, entry.getName()); + assertTrue("entry can't be read", zip.canReadEntryData(entry)); + assertEquals("method", ZipMethod.IMPLODING.getCode(), entry.getMethod()); + + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + final CheckedOutputStream out = new CheckedOutputStream(bout, new CRC32()); + IOUtils.copy(zip.getInputStream(entry), out); + + out.flush(); + + assertEquals("CRC32", entry.getCrc(), out.getChecksum().getValue()); + zip.close(); + } + + @Test + public void testArchiveWithImplodeCompression4K2Trees() throws IOException { + testArchiveWithImplodeCompression("target/test-classes/imploding-4Kdict-2trees.zip", "HEADER.TXT"); + } + + @Test + public void testArchiveWithImplodeCompression8K3Trees() throws IOException { + testArchiveWithImplodeCompression("target/test-classes/imploding-8Kdict-3trees.zip", "LICENSE.TXT"); + } + + @Test + public void testTikaTestArchive() throws IOException { + testArchiveWithImplodeCompression("target/test-classes/moby-imploded.zip", "README"); + } + + private void testZipStreamWithImplodeCompression(final String filename, final String entryName) throws IOException { + final ZipArchiveInputStream zin = new ZipArchiveInputStream(new FileInputStream(new File(filename))); + final ZipArchiveEntry entry = zin.getNextZipEntry(); + assertEquals("entry name", entryName, entry.getName()); + assertTrue("entry can't be read", zin.canReadEntryData(entry)); + assertEquals("method", ZipMethod.IMPLODING.getCode(), entry.getMethod()); + + final InputStream bio = new BoundedInputStream(zin, entry.getSize()); + + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + final CheckedOutputStream out = new CheckedOutputStream(bout, new CRC32()); + IOUtils.copy(bio, out); + + out.flush(); + + assertEquals("CRC32", entry.getCrc(), out.getChecksum().getValue()); + } + + @Test + public void testZipStreamWithImplodeCompression4K2Trees() throws IOException { + testZipStreamWithImplodeCompression("target/test-classes/imploding-4Kdict-2trees.zip", "HEADER.TXT"); + } + + @Test + public void testZipStreamWithImplodeCompression8K3Trees() throws IOException { + testZipStreamWithImplodeCompression("target/test-classes/imploding-8Kdict-3trees.zip", "LICENSE.TXT"); + } + + @Test + public void testTikaTestStream() throws IOException { + testZipStreamWithImplodeCompression("target/test-classes/moby-imploded.zip", "README"); + } + + @Test + public void testConstructorThrowsExceptions() { + try { + ExplodingInputStream eis = new ExplodingInputStream(4095,2,new ByteArrayInputStream(new byte[] {})); + fail("should have failed with illegal argument exception"); + } catch (IllegalArgumentException e) { + } + + try { + ExplodingInputStream eis = new ExplodingInputStream(4096,4,new ByteArrayInputStream(new byte[] {})); + fail("should have failed with illegal argument exception"); + } catch (IllegalArgumentException e) { + } + + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ExtraFieldUtilsTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ExtraFieldUtilsTest.java new file mode 100644 index 000000000..56b7d767a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ExtraFieldUtilsTest.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import org.junit.Before; +import org.junit.Test; + +import java.util.zip.ZipException; + +/** + * JUnit testcases for org.apache.commons.compress.archivers.zip.ExtraFieldUtils. + * + */ +public class ExtraFieldUtilsTest implements UnixStat { + + /** + * Header-ID of a ZipExtraField not supported by Commons Compress. + * + * <p>Used to be ZipShort(1) but this is the ID of the Zip64 extra + * field.</p> + */ + static final ZipShort UNRECOGNIZED_HEADER = new ZipShort(0x5555); + + private AsiExtraField a; + private UnrecognizedExtraField dummy; + private byte[] data; + private byte[] aLocal; + + @Before + public void setUp() { + a = new AsiExtraField(); + a.setMode(0755); + a.setDirectory(true); + dummy = new UnrecognizedExtraField(); + dummy.setHeaderId(UNRECOGNIZED_HEADER); + dummy.setLocalFileDataData(new byte[] {0}); + dummy.setCentralDirectoryData(new byte[] {0}); + + aLocal = a.getLocalFileDataData(); + final byte[] dummyLocal = dummy.getLocalFileDataData(); + data = new byte[4 + aLocal.length + 4 + dummyLocal.length]; + System.arraycopy(a.getHeaderId().getBytes(), 0, data, 0, 2); + System.arraycopy(a.getLocalFileDataLength().getBytes(), 0, data, 2, 2); + System.arraycopy(aLocal, 0, data, 4, aLocal.length); + System.arraycopy(dummy.getHeaderId().getBytes(), 0, data, + 4+aLocal.length, 2); + System.arraycopy(dummy.getLocalFileDataLength().getBytes(), 0, data, + 4+aLocal.length+2, 2); + System.arraycopy(dummyLocal, 0, data, + 4+aLocal.length+4, dummyLocal.length); + + } + + /** + * test parser. + */ + @Test + public void testParse() throws Exception { + final ZipExtraField[] ze = ExtraFieldUtils.parse(data); + assertEquals("number of fields", 2, ze.length); + assertTrue("type field 1", ze[0] instanceof AsiExtraField); + assertEquals("mode field 1", 040755, + ((AsiExtraField) ze[0]).getMode()); + assertTrue("type field 2", ze[1] instanceof UnrecognizedExtraField); + assertEquals("data length field 2", 1, + ze[1].getLocalFileDataLength().getValue()); + + final byte[] data2 = new byte[data.length-1]; + System.arraycopy(data, 0, data2, 0, data2.length); + try { + ExtraFieldUtils.parse(data2); + fail("data should be invalid"); + } catch (final Exception e) { + assertEquals("message", + "bad extra field starting at "+(4 + aLocal.length) + + ". Block length of 1 bytes exceeds remaining data of 0 bytes.", + e.getMessage()); + } + } + + @Test + public void parseTurnsArrayIndexOutOfBoundsIntoZipException() throws Exception { + AsiExtraField f = new AsiExtraField(); + f.setLinkedFile("foo"); + byte[] l = f.getLocalFileDataData(); + // manipulate size of path name to read 4 rather than 3 + l[9] = 4; + // and fake CRC so we actually reach the AIOBE + l[0] = (byte) 0x52; + l[1] = (byte) 0x26; + l[2] = (byte) 0x18; + l[3] = (byte) 0x19; + byte[] d = new byte[4 + l.length]; + System.arraycopy(f.getHeaderId().getBytes(), 0, d, 0, 2); + System.arraycopy(f.getLocalFileDataLength().getBytes(), 0, d, 2, 2); + System.arraycopy(l, 0, d, 4, l.length); + try { + ExtraFieldUtils.parse(d); + fail("data should be invalid"); + } catch (final ZipException e) { + assertEquals("message", + "Failed to parse corrupt ZIP extra field of type 756e", + e.getMessage()); + } + } + + @Test + public void testParseCentral() throws Exception { + final ZipExtraField[] ze = ExtraFieldUtils.parse(data,false); + assertEquals("number of fields", 2, ze.length); + assertTrue("type field 1", ze[0] instanceof AsiExtraField); + assertEquals("mode field 1", 040755, + ((AsiExtraField) ze[0]).getMode()); + assertTrue("type field 2", ze[1] instanceof UnrecognizedExtraField); + assertEquals("data length field 2", 1, + ze[1].getCentralDirectoryLength().getValue()); + + } + + @Test + public void testParseWithRead() throws Exception { + ZipExtraField[] ze = + ExtraFieldUtils.parse(data, true, + ExtraFieldUtils.UnparseableExtraField.READ); + assertEquals("number of fields", 2, ze.length); + assertTrue("type field 1", ze[0] instanceof AsiExtraField); + assertEquals("mode field 1", 040755, + ((AsiExtraField) ze[0]).getMode()); + assertTrue("type field 2", ze[1] instanceof UnrecognizedExtraField); + assertEquals("data length field 2", 1, + ze[1].getLocalFileDataLength().getValue()); + + final byte[] data2 = new byte[data.length-1]; + System.arraycopy(data, 0, data2, 0, data2.length); + ze = ExtraFieldUtils.parse(data2, true, + ExtraFieldUtils.UnparseableExtraField.READ); + assertEquals("number of fields", 2, ze.length); + assertTrue("type field 1", ze[0] instanceof AsiExtraField); + assertEquals("mode field 1", 040755, + ((AsiExtraField) ze[0]).getMode()); + assertTrue("type field 2", ze[1] instanceof UnparseableExtraFieldData); + assertEquals("data length field 2", 4, + ze[1].getLocalFileDataLength().getValue()); + for (int i = 0; i < 4; i++) { + assertEquals("byte number " + i, + data2[data.length - 5 + i], + ze[1].getLocalFileDataData()[i]); + } + } + + @Test + public void testParseWithSkip() throws Exception { + ZipExtraField[] ze = + ExtraFieldUtils.parse(data, true, + ExtraFieldUtils.UnparseableExtraField.SKIP); + assertEquals("number of fields", 2, ze.length); + assertTrue("type field 1", ze[0] instanceof AsiExtraField); + assertEquals("mode field 1", 040755, + ((AsiExtraField) ze[0]).getMode()); + assertTrue("type field 2", ze[1] instanceof UnrecognizedExtraField); + assertEquals("data length field 2", 1, + ze[1].getLocalFileDataLength().getValue()); + + final byte[] data2 = new byte[data.length-1]; + System.arraycopy(data, 0, data2, 0, data2.length); + ze = ExtraFieldUtils.parse(data2, true, + ExtraFieldUtils.UnparseableExtraField.SKIP); + assertEquals("number of fields", 1, ze.length); + assertTrue("type field 1", ze[0] instanceof AsiExtraField); + assertEquals("mode field 1", 040755, + ((AsiExtraField) ze[0]).getMode()); + } + + /** + * Test merge methods + */ + @Test + public void testMerge() { + final byte[] local = + ExtraFieldUtils.mergeLocalFileDataData(new ZipExtraField[] {a, dummy}); + assertEquals("local length", data.length, local.length); + for (int i=0; i<local.length; i++) { + assertEquals("local byte "+i, data[i], local[i]); + } + + final byte[] dummyCentral = dummy.getCentralDirectoryData(); + final byte[] data2 = new byte[4 + aLocal.length + 4 + dummyCentral.length]; + System.arraycopy(data, 0, data2, 0, 4 + aLocal.length + 2); + System.arraycopy(dummy.getCentralDirectoryLength().getBytes(), 0, + data2, 4+aLocal.length+2, 2); + System.arraycopy(dummyCentral, 0, data2, + 4+aLocal.length+4, dummyCentral.length); + + + final byte[] central = + ExtraFieldUtils.mergeCentralDirectoryData(new ZipExtraField[] {a, dummy}); + assertEquals("central length", data2.length, central.length); + for (int i=0; i<central.length; i++) { + assertEquals("central byte "+i, data2[i], central[i]); + } + + } + + @Test + public void testMergeWithUnparseableData() throws Exception { + final ZipExtraField d = new UnparseableExtraFieldData(); + final byte[] b = UNRECOGNIZED_HEADER.getBytes(); + d.parseFromLocalFileData(new byte[] {b[0], b[1], 1, 0}, 0, 4); + final byte[] local = + ExtraFieldUtils.mergeLocalFileDataData(new ZipExtraField[] {a, d}); + assertEquals("local length", data.length - 1, local.length); + for (int i = 0; i < local.length; i++) { + assertEquals("local byte " + i, data[i], local[i]); + } + + final byte[] dCentral = d.getCentralDirectoryData(); + final byte[] data2 = new byte[4 + aLocal.length + dCentral.length]; + System.arraycopy(data, 0, data2, 0, 4 + aLocal.length + 2); + System.arraycopy(dCentral, 0, data2, + 4 + aLocal.length, dCentral.length); + + + final byte[] central = + ExtraFieldUtils.mergeCentralDirectoryData(new ZipExtraField[] {a, d}); + assertEquals("central length", data2.length, central.length); + for (int i = 0; i < central.length; i++) { + assertEquals("central byte " + i, data2[i], central[i]); + } + + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/GeneralPurposeBitTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/GeneralPurposeBitTest.java new file mode 100644 index 000000000..4fc119ac2 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/GeneralPurposeBitTest.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.util.Arrays; + +import org.junit.Test; + +public class GeneralPurposeBitTest { + + @Test + public void testDefaults() { + assertFalse(new GeneralPurposeBit().usesDataDescriptor()); + assertFalse(new GeneralPurposeBit().usesUTF8ForNames()); + assertFalse(new GeneralPurposeBit().usesEncryption()); + assertFalse(new GeneralPurposeBit().usesStrongEncryption()); + final byte[] b = new byte[2]; + assertTrue(Arrays.equals(b, new GeneralPurposeBit().encode())); + } + + @Test + public void testParseEdgeCases() { + assertFalse(GeneralPurposeBit.parse(new byte[2], 0) + .usesDataDescriptor()); + assertFalse(GeneralPurposeBit.parse(new byte[2], 0) + .usesUTF8ForNames()); + assertFalse(GeneralPurposeBit.parse(new byte[2], 0) + .usesEncryption()); + assertFalse(GeneralPurposeBit.parse(new byte[2], 0) + .usesStrongEncryption()); + assertTrue(GeneralPurposeBit.parse(new byte[] {(byte) 255, (byte) 255}, + 0) + .usesDataDescriptor()); + assertTrue(GeneralPurposeBit.parse(new byte[] {(byte) 255, (byte) 255}, + 0) + .usesUTF8ForNames()); + assertTrue(GeneralPurposeBit.parse(new byte[] {(byte) 255, (byte) 255}, + 0) + .usesEncryption()); + assertTrue(GeneralPurposeBit.parse(new byte[] {(byte) 255, (byte) 255}, + 0) + .usesStrongEncryption()); + } + + @Test + public void testDataDescriptor() { + final byte[] flags = new byte[] {(byte) 8, (byte) 0}; + assertTrue(GeneralPurposeBit.parse(flags, 0).usesDataDescriptor()); + final GeneralPurposeBit b = new GeneralPurposeBit(); + b.useDataDescriptor(true); + assertTrue(Arrays.equals(flags, b.encode())); + } + + @Test + public void testLanguageEncodingFlag() { + final byte[] flags = new byte[] {(byte) 0, (byte) 8}; + assertTrue(GeneralPurposeBit.parse(flags, 0).usesUTF8ForNames()); + final GeneralPurposeBit b = new GeneralPurposeBit(); + b.useUTF8ForNames(true); + assertTrue(Arrays.equals(flags, b.encode())); + } + + @Test + public void testEncryption() { + final byte[] flags = new byte[] {(byte) 1, (byte) 0}; + assertTrue(GeneralPurposeBit.parse(flags, 0).usesEncryption()); + final GeneralPurposeBit b = new GeneralPurposeBit(); + b.useEncryption(true); + assertTrue(Arrays.equals(flags, b.encode())); + } + + @Test + public void testStrongEncryption() { + byte[] flags = new byte[] {(byte) 65, (byte) 0}; + assertTrue(GeneralPurposeBit.parse(flags, 0).usesStrongEncryption()); + final GeneralPurposeBit b = new GeneralPurposeBit(); + b.useStrongEncryption(true); + assertTrue(b.usesEncryption()); + assertTrue(Arrays.equals(flags, b.encode())); + + flags = new byte[] {(byte) 64, (byte) 0}; + assertFalse(GeneralPurposeBit.parse(flags, 0).usesStrongEncryption()); + } + + @Test + public void testClone() { + final GeneralPurposeBit b = new GeneralPurposeBit(); + b.useStrongEncryption(true); + b.useUTF8ForNames(true); + assertEquals(b, b.clone()); + assertNotSame(b, b.clone()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/Lister.java b/src/test/java/org/apache/commons/compress/archivers/zip/Lister.java new file mode 100644 index 000000000..a633715d3 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/Lister.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Enumeration; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.utils.IOUtils; + +/** + * Simple command line application that lists the contents of a ZIP archive. + * + * <p>The name of the archive must be given as a command line argument.</p> + * + * <p>Optional command line arguments specify the encoding to assume + * and whether to use ZipFile or ZipArchiveInputStream.</p> + */ +public final class Lister { + private static class CommandLine { + String archive; + boolean useStream = false; + String encoding; + boolean allowStoredEntriesWithDataDescriptor = false; + String dir; + } + + public static void main(final String[] args) throws IOException { + final CommandLine cl = parse(args); + final File f = new File(cl.archive); + if (!f.isFile()) { + System.err.println(f + " doesn't exists or is a directory"); + usage(); + } + if (cl.useStream) { + try (BufferedInputStream fs = new BufferedInputStream(new FileInputStream(f))) { + final ZipArchiveInputStream zs = + new ZipArchiveInputStream(fs, cl.encoding, true, + cl.allowStoredEntriesWithDataDescriptor); + for (ArchiveEntry entry = zs.getNextEntry(); + entry != null; + entry = zs.getNextEntry()) { + final ZipArchiveEntry ze = (ZipArchiveEntry) entry; + list(ze); + if (cl.dir != null) { + extract(cl.dir, ze, zs); + } + } + } + } else { + try (ZipFile zf = new ZipFile(f, cl.encoding)) { + for (final Enumeration<ZipArchiveEntry> entries = zf.getEntries(); + entries.hasMoreElements(); ) { + final ZipArchiveEntry ze = entries.nextElement(); + list(ze); + if (cl.dir != null) { + try (InputStream is = zf.getInputStream(ze)) { + extract(cl.dir, ze, is); + } + } + } + } + } + } + + private static void list(final ZipArchiveEntry entry) { + System.out.println(entry.getName()); + } + + private static void extract(final String dir, final ZipArchiveEntry entry, + final InputStream is) throws IOException { + final File f = new File(dir, entry.getName()); + if (!f.getParentFile().exists()) { + f.getParentFile().mkdirs(); + } + FileOutputStream fos = null; + try { + fos = new FileOutputStream(f); + IOUtils.copy(is, fos); + } finally { + if (fos != null) { + fos.close(); + } + } + } + + private static CommandLine parse(final String[] args) { + final CommandLine cl = new CommandLine(); + boolean error = false; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-enc")) { + if (args.length > i + 1) { + cl.encoding = args[++i]; + } else { + System.err.println("missing argument to -enc"); + error = true; + } + } else if (args[i].equals("-extract")) { + if (args.length > i + 1) { + cl.dir = args[++i]; + } else { + System.err.println("missing argument to -extract"); + error = true; + } + } else if (args[i].equals("-stream")) { + cl.useStream = true; + } else if (args[i].equals("+storeddd")) { + cl.allowStoredEntriesWithDataDescriptor = true; + } else if (args[i].equals("-file")) { + cl.useStream = false; + } else if (cl.archive != null) { + System.err.println("Only one archive"); + error = true; + } else { + cl.archive = args[i]; + } + } + if (error || cl.archive == null) { + usage(); + } + return cl; + } + + private static void usage() { + System.err.println("lister [-enc encoding] [-stream] [-file]" + + " [+storeddd] [-extract dir] archive"); + System.exit(1); + } +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/Maven221MultiVolumeTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/Maven221MultiVolumeTest.java new file mode 100644 index 000000000..0a905e3ab --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/Maven221MultiVolumeTest.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.junit.Test; + +/** + * JUnit testcase for a multi-volume zip file. + * + * Some tools (like 7-zip) allow users to split a large archives into 'volumes' + * with a given size to fit them into multiple cds, usb drives, or emails with + * an attachment size limit. It's basically the same file split into chunks of + * exactly 65536 bytes length. Concatenating volumes yields exactly the original + * file. There is no mechanism in the ZIP algorithm to accommodate for this. + * Before commons-compress used to enter an infinite loop on the last entry for + * such a file. This test is intended to prove that this error doesn't occur + * anymore. All entries but the last one are returned correctly, the last entry + * yields an exception. + * + */ +public class Maven221MultiVolumeTest { + + private static final String [] ENTRIES = new String [] { + "apache-maven-2.2.1/", + "apache-maven-2.2.1/LICENSE.txt", + "apache-maven-2.2.1/NOTICE.txt", + "apache-maven-2.2.1/README.txt", + "apache-maven-2.2.1/bin/", + "apache-maven-2.2.1/bin/m2.conf", + "apache-maven-2.2.1/bin/mvn", + "apache-maven-2.2.1/bin/mvn.bat", + "apache-maven-2.2.1/bin/mvnDebug", + "apache-maven-2.2.1/bin/mvnDebug.bat", + "apache-maven-2.2.1/boot/", + "apache-maven-2.2.1/boot/classworlds-1.1.jar", + "apache-maven-2.2.1/conf/", + "apache-maven-2.2.1/conf/settings.xml", + "apache-maven-2.2.1/lib/" + }; + + private static final String LAST_ENTRY_NAME = + "apache-maven-2.2.1/lib/maven-2.2.1-uber.jar"; + + @Test + public void testRead7ZipMultiVolumeArchiveForStream() throws IOException { + + final FileInputStream archive = + new FileInputStream(getFile("apache-maven-2.2.1.zip.001")); + ZipArchiveInputStream zi = null; + try { + zi = new ZipArchiveInputStream(archive,null,false); + + // these are the entries that are supposed to be processed + // correctly without any problems + for (final String element : ENTRIES) { + assertEquals(element, zi.getNextEntry().getName()); + } + + // this is the last entry that is truncated + final ArchiveEntry lastEntry = zi.getNextEntry(); + assertEquals(LAST_ENTRY_NAME, lastEntry.getName()); + final byte [] buffer = new byte [4096]; + + // before the fix, we'd get 0 bytes on this read and all + // subsequent reads thus a client application might enter + // an infinite loop after the fix, we should get an + // exception + try { + while (zi.read(buffer) > 0) { } + fail("shouldn't be able to read from truncated entry"); + } catch (final IOException e) { + assertEquals("Truncated ZIP file", e.getMessage()); + } + + try { + zi.read(buffer); + fail("shouldn't be able to read from truncated entry after exception"); + } catch (final IOException e) { + assertEquals("Truncated ZIP file", e.getMessage()); + } + + // and now we get another entry, which should also yield + // an exception + try { + zi.getNextEntry(); + fail("shouldn't be able to read another entry from truncated" + + " file"); + } catch (final IOException e) { + // this is to be expected + } + } finally { + if (zi != null) { + zi.close(); + } + } + } + + @Test(expected=IOException.class) + public void testRead7ZipMultiVolumeArchiveForFile() throws IOException { + final File file = getFile("apache-maven-2.2.1.zip.001"); + ZipFile zf = new ZipFile(file); + zf.close(); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreatorTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreatorTest.java new file mode 100644 index 000000000..19a8fe9fb --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreatorTest.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore; +import org.apache.commons.compress.parallel.InputStreamSupplier; +import org.apache.commons.compress.parallel.ScatterGatherBackingStore; +import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.After; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.zip.ZipEntry; + +import static org.apache.commons.compress.AbstractTestCase.tryHardToDelete; +import static org.junit.Assert.*; + +public class ParallelScatterZipCreatorTest { + + private final int NUMITEMS = 5000; + + private File result; + private File tmp; + + @After + public void cleanup() { + tryHardToDelete(result); + tryHardToDelete(tmp); + } + + @Test + public void concurrent() + throws Exception { + result = File.createTempFile("parallelScatterGather1", ""); + final ZipArchiveOutputStream zos = new ZipArchiveOutputStream(result); + zos.setEncoding("UTF-8"); + final ParallelScatterZipCreator zipCreator = new ParallelScatterZipCreator(); + + final Map<String, byte[]> entries = writeEntries(zipCreator); + zipCreator.writeTo(zos); + zos.close(); + removeEntriesFoundInZipFile(result, entries); + assertTrue(entries.size() == 0); + assertNotNull( zipCreator.getStatisticsMessage()); + } + + @Test + public void callableApi() + throws Exception { + result = File.createTempFile("parallelScatterGather2", ""); + final ZipArchiveOutputStream zos = new ZipArchiveOutputStream(result); + zos.setEncoding("UTF-8"); + final ExecutorService es = Executors.newFixedThreadPool(1); + + final ScatterGatherBackingStoreSupplier supp = new ScatterGatherBackingStoreSupplier() { + @Override + public ScatterGatherBackingStore get() throws IOException { + return new FileBasedScatterGatherBackingStore(tmp = File.createTempFile("parallelscatter", "n1")); + } + }; + + final ParallelScatterZipCreator zipCreator = new ParallelScatterZipCreator(es, supp); + final Map<String, byte[]> entries = writeEntriesAsCallable(zipCreator); + zipCreator.writeTo(zos); + zos.close(); + + + removeEntriesFoundInZipFile(result, entries); + assertTrue(entries.size() == 0); + assertNotNull(zipCreator.getStatisticsMessage()); + } + + private void removeEntriesFoundInZipFile(final File result, final Map<String, byte[]> entries) throws IOException { + final ZipFile zf = new ZipFile(result); + final Enumeration<ZipArchiveEntry> entriesInPhysicalOrder = zf.getEntriesInPhysicalOrder(); + while (entriesInPhysicalOrder.hasMoreElements()){ + final ZipArchiveEntry zipArchiveEntry = entriesInPhysicalOrder.nextElement(); + final InputStream inputStream = zf.getInputStream(zipArchiveEntry); + final byte[] actual = IOUtils.toByteArray(inputStream); + final byte[] expected = entries.remove(zipArchiveEntry.getName()); + assertArrayEquals( "For " + zipArchiveEntry.getName(), expected, actual); + } + zf.close(); + } + + private Map<String, byte[]> writeEntries(final ParallelScatterZipCreator zipCreator) { + final Map<String, byte[]> entries = new HashMap<>(); + for (int i = 0; i < NUMITEMS; i++){ + final byte[] payloadBytes = ("content" + i).getBytes(); + final ZipArchiveEntry za = createZipArchiveEntry(entries, i, payloadBytes); + final InputStreamSupplier iss = new InputStreamSupplier() { + @Override + public InputStream get() { + return new ByteArrayInputStream(payloadBytes); + } + }; + if (i % 2 == 0) { + zipCreator.addArchiveEntry(za, iss); + } else { + final ZipArchiveEntryRequestSupplier zaSupplier = new ZipArchiveEntryRequestSupplier() { + @Override + public ZipArchiveEntryRequest get() { + return ZipArchiveEntryRequest.createZipArchiveEntryRequest(za, iss); + } + }; + zipCreator.addArchiveEntry(zaSupplier); + } + } + return entries; + } + + private Map<String, byte[]> writeEntriesAsCallable(final ParallelScatterZipCreator zipCreator) { + final Map<String, byte[]> entries = new HashMap<>(); + for (int i = 0; i < NUMITEMS; i++){ + final byte[] payloadBytes = ("content" + i).getBytes(); + final ZipArchiveEntry za = createZipArchiveEntry(entries, i, payloadBytes); + final InputStreamSupplier iss = new InputStreamSupplier() { + @Override + public InputStream get() { + return new ByteArrayInputStream(payloadBytes); + } + }; + final Callable<Object> callable; + if (i % 2 == 0) { + callable = zipCreator.createCallable(za, iss); + } else { + final ZipArchiveEntryRequestSupplier zaSupplier = new ZipArchiveEntryRequestSupplier() { + @Override + public ZipArchiveEntryRequest get() { + return ZipArchiveEntryRequest.createZipArchiveEntryRequest(za, iss); + } + }; + callable = zipCreator.createCallable(zaSupplier); + } + + zipCreator.submit(callable); + } + return entries; + } + + private ZipArchiveEntry createZipArchiveEntry(final Map<String, byte[]> entries, final int i, final byte[] payloadBytes) { + final ZipArchiveEntry za = new ZipArchiveEntry( "file" + i); + entries.put( za.getName(), payloadBytes); + za.setMethod(ZipEntry.DEFLATED); + za.setSize(payloadBytes.length); + za.setUnixMode(UnixStat.FILE_FLAG | 0664); + return za; + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/PkWareExtraHeaderTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/PkWareExtraHeaderTest.java new file mode 100644 index 000000000..3fe1730f0 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/PkWareExtraHeaderTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import org.apache.commons.compress.archivers.zip.PKWareExtraHeader.EncryptionAlgorithm; +import org.apache.commons.compress.archivers.zip.PKWareExtraHeader.HashAlgorithm; +import org.junit.Test; + +public class PkWareExtraHeaderTest { + + @Test public void testEncryptionAlgorithm() { + String name = "AES256"; + int code = EncryptionAlgorithm.AES256.getCode(); + EncryptionAlgorithm e = EncryptionAlgorithm.valueOf(name); + assertEquals(code,e.getCode()); + assertNotNull(e); + } + + @Test public void testHashAlgorithm() { + String name = "SHA256"; + int code = HashAlgorithm.SHA256.getCode(); + HashAlgorithm e = HashAlgorithm.valueOf(name); + assertEquals(code,e.getCode()); + assertNotNull(e); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ScatterSample.java b/src/test/java/org/apache/commons/compress/archivers/zip/ScatterSample.java new file mode 100644 index 000000000..deb7a7e5d --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ScatterSample.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.parallel.InputStreamSupplier; + +import java.io.File; +import java.io.IOException; +import java.util.concurrent.ExecutionException; + +public class ScatterSample { + + ParallelScatterZipCreator scatterZipCreator = new ParallelScatterZipCreator(); + ScatterZipOutputStream dirs = ScatterZipOutputStream.fileBased(File.createTempFile("scatter-dirs", "tmp")); + + public ScatterSample() throws IOException { + } + + public void addEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier streamSupplier) throws IOException { + if (zipArchiveEntry.isDirectory() && !zipArchiveEntry.isUnixSymlink()) { + dirs.addArchiveEntry(ZipArchiveEntryRequest.createZipArchiveEntryRequest(zipArchiveEntry, streamSupplier)); + } else { + scatterZipCreator.addArchiveEntry( zipArchiveEntry, streamSupplier); + } + } + + public void writeTo(final ZipArchiveOutputStream zipArchiveOutputStream) + throws IOException, ExecutionException, InterruptedException { + dirs.writeTo(zipArchiveOutputStream); + dirs.close(); + scatterZipCreator.writeTo(zipArchiveOutputStream); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ScatterSampleTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ScatterSampleTest.java new file mode 100644 index 000000000..d94f294be --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ScatterSampleTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.parallel.InputStreamSupplier; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.ExecutionException; +import java.util.zip.ZipEntry; + +import static org.junit.Assert.*; + +public class ScatterSampleTest { + + @Test + public void testSample() throws Exception { + final File result = File.createTempFile("testSample", "fe"); + + createFile(result); + checkFile(result); + } + + private void createFile(final File result) throws IOException, ExecutionException, InterruptedException { + final ScatterSample scatterSample = new ScatterSample(); + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry("test1.xml"); + archiveEntry.setMethod(ZipEntry.DEFLATED); + final InputStreamSupplier supp = new InputStreamSupplier() { + @Override + public InputStream get() { + return new ByteArrayInputStream("Hello".getBytes()); + } + }; + + scatterSample.addEntry(archiveEntry, supp); + final ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(result); + scatterSample.writeTo(zipArchiveOutputStream); + zipArchiveOutputStream.close(); + } + + private void checkFile(final File result) throws IOException { + final ZipFile zf = new ZipFile(result); + final ZipArchiveEntry archiveEntry1 = zf.getEntries().nextElement(); + assertEquals( "test1.xml", archiveEntry1.getName()); + final InputStream inputStream = zf.getInputStream(archiveEntry1); + final byte[] b = new byte[6]; + final int i = IOUtils.readFully(inputStream, b); + assertEquals(5, i); + assertEquals('H', b[0]); + assertEquals('o', b[4]); + zf.close(); + result.delete(); + } +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStreamTest.java new file mode 100644 index 000000000..124cf9ace --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStreamTest.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.apache.commons.compress.parallel.InputStreamSupplier; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.After; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.InputStream; +import java.util.zip.ZipEntry; + +import static org.apache.commons.compress.AbstractTestCase.tryHardToDelete; +import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class ScatterZipOutputStreamTest { + + private File scatterFile = null; + private File target = null; + + @After + public void cleanup() { + tryHardToDelete(scatterFile); + tryHardToDelete(target); + } + + @Test + public void putArchiveEntry() throws Exception { + scatterFile = File.createTempFile("scattertest", ".notzip"); + final ScatterZipOutputStream scatterZipOutputStream = ScatterZipOutputStream.fileBased(scatterFile); + final byte[] B_PAYLOAD = "RBBBBBBS".getBytes(); + final byte[] A_PAYLOAD = "XAAY".getBytes(); + + final ZipArchiveEntry zab = new ZipArchiveEntry("b.txt"); + zab.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload = new ByteArrayInputStream(B_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zab, createPayloadSupplier(payload))); + + final ZipArchiveEntry zae = new ZipArchiveEntry("a.txt"); + zae.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload1 = new ByteArrayInputStream(A_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zae, createPayloadSupplier(payload1))); + + target = File.createTempFile("scattertest", ".zip"); + final ZipArchiveOutputStream outputStream = new ZipArchiveOutputStream(target); + scatterZipOutputStream.writeTo( outputStream); + outputStream.close(); + scatterZipOutputStream.close(); + + final ZipFile zf = new ZipFile(target); + final ZipArchiveEntry b_entry = zf.getEntries("b.txt").iterator().next(); + assertEquals(8, b_entry.getSize()); + assertArrayEquals(B_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(b_entry))); + + final ZipArchiveEntry a_entry = zf.getEntries("a.txt").iterator().next(); + assertEquals(4, a_entry.getSize()); + assertArrayEquals(A_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(a_entry))); + zf.close(); + } + + private InputStreamSupplier createPayloadSupplier(final ByteArrayInputStream payload) { + return new InputStreamSupplier() { + @Override + public InputStream get() { + return payload; + } + }; + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/StreamCompressorTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/StreamCompressorTest.java new file mode 100644 index 000000000..38ed531c7 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/StreamCompressorTest.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.zip.Deflater; +import java.util.zip.ZipEntry; +import org.junit.Test; + +public class StreamCompressorTest { + + @Test + public void storedEntries() throws Exception { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final StreamCompressor sc = StreamCompressor.create( baos); + sc.deflate(new ByteArrayInputStream("A".getBytes()), ZipEntry.STORED); + sc.deflate(new ByteArrayInputStream("BAD".getBytes()), ZipEntry.STORED); + assertEquals(3, sc.getBytesRead()); + assertEquals(3, sc.getBytesWrittenForLastEntry()); + assertEquals(344750961, sc.getCrc32()); + sc.deflate(new ByteArrayInputStream("CAFE".getBytes()), ZipEntry.STORED); + assertEquals("ABADCAFE", baos.toString()); + } + + @Test + public void deflatedEntries() throws Exception { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final StreamCompressor sc = StreamCompressor.create( baos); + sc.deflate(new ByteArrayInputStream("AAAAAABBBBBB".getBytes()), ZipEntry.DEFLATED); + assertEquals(12, sc.getBytesRead()); + assertEquals(8, sc.getBytesWrittenForLastEntry()); + assertEquals(3299542, sc.getCrc32()); + + final byte[] actuals = baos.toByteArray(); + final byte[] expected = new byte[]{115,116,4,1,39,48,0,0}; + // Note that this test really asserts stuff about the java Deflater, which might be a little bit brittle + assertArrayEquals(expected, actuals); + } + + @Test + public void testCreateDataOutputCompressor() throws IOException { + DataOutput dataOutputStream = new DataOutputStream(new ByteArrayOutputStream()); + try (StreamCompressor streamCompressor = StreamCompressor + .create(dataOutputStream, new Deflater(9))) { + assertNotNull(streamCompressor); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java new file mode 100644 index 000000000..da9bb24d7 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java @@ -0,0 +1,408 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.util.Enumeration; +import java.util.zip.CRC32; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.CharsetNames; +import org.junit.Test; + +public class UTF8ZipFilesTest extends AbstractTestCase { + + private static final String CP437 = "cp437"; + private static final String ASCII_TXT = "ascii.txt"; + private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt"; + private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt"; + + @Test + public void testUtf8FileRoundtripExplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.UTF_8, true, true); + } + + @Test + public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.UTF_8, false, true); + } + + @Test + public void testCP437FileRoundtripExplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CP437, false, true); + } + + @Test + public void testASCIIFileRoundtripExplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.US_ASCII, false, true); + } + + @Test + public void testUtf8FileRoundtripImplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.UTF_8, true, false); + } + + @Test + public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.UTF_8, false, false); + } + + @Test + public void testCP437FileRoundtripImplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CP437, false, false); + } + + @Test + public void testASCIIFileRoundtripImplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.US_ASCII, false, false); + } + + /* + * 7-ZIP created archive, uses EFS to signal UTF-8 filenames. + * + * 7-ZIP doesn't use EFS for strings that can be encoded in CP437 + * - which is true for OIL_BARREL_TXT. + */ + @Test + public void testRead7ZipArchive() throws IOException { + final File archive = getFile("utf8-7zip-test.zip"); + ZipFile zf = null; + try { + zf = new ZipFile(archive, CP437, false); + assertNotNull(zf.getEntry(ASCII_TXT)); + assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT)); + assertNotNull(zf.getEntry(OIL_BARREL_TXT)); + } finally { + ZipFile.closeQuietly(zf); + } + } + + @Test + public void testRead7ZipArchiveForStream() throws IOException { + final FileInputStream archive = + new FileInputStream(getFile("utf8-7zip-test.zip")); + ZipArchiveInputStream zi = null; + try { + zi = new ZipArchiveInputStream(archive, CP437, false); + assertEquals(ASCII_TXT, zi.getNextEntry().getName()); + assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); + assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); + } finally { + if (zi != null) { + zi.close(); + } + } + } + + /* + * WinZIP created archive, uses Unicode Extra Fields but only in + * the central directory. + */ + @Test + public void testReadWinZipArchive() throws IOException { + final File archive = getFile("utf8-winzip-test.zip"); + ZipFile zf = null; + try { + zf = new ZipFile(archive, null, true); + assertCanRead(zf, ASCII_TXT); + assertCanRead(zf, EURO_FOR_DOLLAR_TXT); + assertCanRead(zf, OIL_BARREL_TXT); + } finally { + ZipFile.closeQuietly(zf); + } + } + + private void assertCanRead(final ZipFile zf, final String fileName) throws IOException { + final ZipArchiveEntry entry = zf.getEntry(fileName); + assertNotNull("Entry doesn't exist", entry); + final InputStream is = zf.getInputStream(entry); + assertNotNull("InputStream is null", is); + try { + is.read(); + } finally { + is.close(); + } + } + + @Test + public void testReadWinZipArchiveForStream() throws IOException { + final FileInputStream archive = + new FileInputStream(getFile("utf8-winzip-test.zip")); + ZipArchiveInputStream zi = null; + try { + zi = new ZipArchiveInputStream(archive, null, true); + assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); + assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); + assertEquals(ASCII_TXT, zi.getNextEntry().getName()); + } finally { + if (zi != null) { + zi.close(); + } + } + } + + @Test + public void testZipFileReadsUnicodeFields() throws IOException { + final File file = File.createTempFile("unicode-test", ".zip"); + file.deleteOnExit(); + ZipArchiveInputStream zi = null; + try { + createTestFile(file, CharsetNames.US_ASCII, false, true); + final FileInputStream archive = new FileInputStream(file); + zi = new ZipArchiveInputStream(archive, CharsetNames.US_ASCII, true); + assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); + assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); + assertEquals(ASCII_TXT, zi.getNextEntry().getName()); + } finally { + if (zi != null) { + zi.close(); + } + tryHardToDelete(file); + } + } + + @Test + public void testZipArchiveInputStreamReadsUnicodeFields() + throws IOException { + final File file = File.createTempFile("unicode-test", ".zip"); + file.deleteOnExit(); + ZipFile zf = null; + try { + createTestFile(file, CharsetNames.US_ASCII, false, true); + zf = new ZipFile(file, CharsetNames.US_ASCII, true); + assertNotNull(zf.getEntry(ASCII_TXT)); + assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT)); + assertNotNull(zf.getEntry(OIL_BARREL_TXT)); + } finally { + ZipFile.closeQuietly(zf); + tryHardToDelete(file); + } + } + + @Test + public void testRawNameReadFromZipFile() + throws IOException { + final File archive = getFile("utf8-7zip-test.zip"); + ZipFile zf = null; + try { + zf = new ZipFile(archive, CP437, false); + assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT)); + } finally { + ZipFile.closeQuietly(zf); + } + } + + @Test + public void testRawNameReadFromStream() + throws IOException { + final FileInputStream archive = + new FileInputStream(getFile("utf8-7zip-test.zip")); + ZipArchiveInputStream zi = null; + try { + zi = new ZipArchiveInputStream(archive, CP437, false); + assertRawNameOfAcsiiTxt((ZipArchiveEntry) zi.getNextEntry()); + } finally { + if (zi != null) { + zi.close(); + } + } + } + + private static void testFileRoundtrip(final String encoding, final boolean withEFS, + final boolean withExplicitUnicodeExtra) + throws IOException { + + final File file = File.createTempFile(encoding + "-test", ".zip"); + file.deleteOnExit(); + try { + createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra); + testFile(file, encoding); + } finally { + tryHardToDelete(file); + } + } + + private static void createTestFile(final File file, final String encoding, + final boolean withEFS, + final boolean withExplicitUnicodeExtra) + throws UnsupportedEncodingException, IOException { + + final ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + + ZipArchiveOutputStream zos = null; + try { + zos = new ZipArchiveOutputStream(file); + zos.setEncoding(encoding); + zos.setUseLanguageEncodingFlag(withEFS); + zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ? + ZipArchiveOutputStream + .UnicodeExtraFieldPolicy.NEVER + : ZipArchiveOutputStream + .UnicodeExtraFieldPolicy.ALWAYS); + + ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT); + if (withExplicitUnicodeExtra + && !zipEncoding.canEncode(ze.getName())) { + + final ByteBuffer en = zipEncoding.encode(ze.getName()); + + ze.addExtraField(new UnicodePathExtraField(ze.getName(), + en.array(), + en.arrayOffset(), + en.limit() + - en.position())); + } + + zos.putArchiveEntry(ze); + zos.write("Hello, world!".getBytes(CharsetNames.US_ASCII)); + zos.closeArchiveEntry(); + + ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT); + if (withExplicitUnicodeExtra + && !zipEncoding.canEncode(ze.getName())) { + + final ByteBuffer en = zipEncoding.encode(ze.getName()); + + ze.addExtraField(new UnicodePathExtraField(ze.getName(), + en.array(), + en.arrayOffset(), + en.limit() + - en.position())); + } + + zos.putArchiveEntry(ze); + zos.write("Give me your money!".getBytes(CharsetNames.US_ASCII)); + zos.closeArchiveEntry(); + + ze = new ZipArchiveEntry(ASCII_TXT); + + if (withExplicitUnicodeExtra + && !zipEncoding.canEncode(ze.getName())) { + + final ByteBuffer en = zipEncoding.encode(ze.getName()); + + ze.addExtraField(new UnicodePathExtraField(ze.getName(), + en.array(), + en.arrayOffset(), + en.limit() + - en.position())); + } + + zos.putArchiveEntry(ze); + zos.write("ascii".getBytes(CharsetNames.US_ASCII)); + zos.closeArchiveEntry(); + + zos.finish(); + } finally { + if (zos != null) { + try { + zos.close(); + } catch (final IOException e) { /* swallow */ } + } + } + } + + private static void testFile(final File file, final String encoding) + throws IOException { + ZipFile zf = null; + try { + zf = new ZipFile(file, encoding, false); + + final Enumeration<ZipArchiveEntry> e = zf.getEntries(); + while (e.hasMoreElements()) { + final ZipArchiveEntry ze = e.nextElement(); + + if (ze.getName().endsWith("sser.txt")) { + assertUnicodeName(ze, OIL_BARREL_TXT, encoding); + + } else if (ze.getName().endsWith("_for_Dollar.txt")) { + assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding); + } else if (!ze.getName().equals(ASCII_TXT)) { + throw new AssertionError("Unrecognized ZIP entry with name [" + + ze.getName() + "] found."); + } + } + } finally { + ZipFile.closeQuietly(zf); + } + } + + private static UnicodePathExtraField findUniCodePath(final ZipArchiveEntry ze) { + return (UnicodePathExtraField) + ze.getExtraField(UnicodePathExtraField.UPATH_ID); + } + + private static void assertUnicodeName(final ZipArchiveEntry ze, + final String expectedName, + final String encoding) + throws IOException { + if (!expectedName.equals(ze.getName())) { + final UnicodePathExtraField ucpf = findUniCodePath(ze); + assertNotNull(ucpf); + + final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding); + final ByteBuffer ne = enc.encode(ze.getName()); + + final CRC32 crc = new CRC32(); + crc.update(ne.array(), ne.arrayOffset(), + ne.limit() - ne.position()); + + assertEquals(crc.getValue(), ucpf.getNameCRC32()); + assertEquals(expectedName, new String(ucpf.getUnicodeName(), + CharsetNames.UTF_8)); + } + } + + @Test + public void testUtf8Interoperability() throws IOException { + final File file1 = getFile("utf8-7zip-test.zip"); + final File file2 = getFile("utf8-winzip-test.zip"); + + testFile(file1,CP437); + testFile(file2,CP437); + + } + + private static void assertRawNameOfAcsiiTxt(final ZipArchiveEntry ze) { + final byte[] b = ze.getRawName(); + assertNotNull(b); + final int len = ASCII_TXT.length(); + assertEquals(len, b.length); + for (int i = 0; i < len; i++) { + assertEquals("Byte " + i, (byte) ASCII_TXT.charAt(i), b[i]); + } + assertNotSame(b, ze.getRawName()); + } +} + diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/X000A_NTFSTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/X000A_NTFSTest.java new file mode 100644 index 000000000..277d360af --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/X000A_NTFSTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.junit.Test; + +import java.util.Date; + +import static org.junit.Assert.assertEquals; + +public class X000A_NTFSTest { + + @Test + public void simpleRountrip() throws Exception { + final X000A_NTFS xf = new X000A_NTFS(); + xf.setModifyJavaTime(new Date(0)); + // one second past midnight + xf.setAccessJavaTime(new Date(-11644473601000l)); + xf.setCreateJavaTime(null); + final byte[] b = xf.getLocalFileDataData(); + + final X000A_NTFS xf2 = new X000A_NTFS(); + xf2.parseFromLocalFileData(b, 0, b.length); + assertEquals(new Date(0), xf2.getModifyJavaTime()); + assertEquals(new Date(-11644473601000l), xf2.getAccessJavaTime()); + assertEquals(null, xf2.getCreateJavaTime()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/X5455_ExtendedTimestampTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/X5455_ExtendedTimestampTest.java new file mode 100644 index 000000000..e3bd60eab --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/X5455_ExtendedTimestampTest.java @@ -0,0 +1,558 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; +import java.util.Enumeration; +import java.util.TimeZone; +import java.util.zip.ZipException; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.apache.commons.compress.AbstractTestCase.mkdir; +import static org.apache.commons.compress.AbstractTestCase.rmdir; +import static org.apache.commons.compress.archivers.zip.X5455_ExtendedTimestamp.ACCESS_TIME_BIT; +import static org.apache.commons.compress.archivers.zip.X5455_ExtendedTimestamp.CREATE_TIME_BIT; +import static org.apache.commons.compress.archivers.zip.X5455_ExtendedTimestamp.MODIFY_TIME_BIT; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class X5455_ExtendedTimestampTest { + private final static ZipShort X5455 = new ZipShort(0x5455); + + private final static ZipLong ZERO_TIME = new ZipLong(0); + private final static ZipLong MAX_TIME_SECONDS = new ZipLong(Integer.MAX_VALUE); + private final static SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd/HH:mm:ss Z"); + + static { + DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + + /** + * The extended field (xf) we are testing. + */ + private X5455_ExtendedTimestamp xf; + + private File tmpDir; + + @Before + public void before() { + xf = new X5455_ExtendedTimestamp(); + } + + @After + public void removeTempFiles() { + if (tmpDir != null) { + rmdir(tmpDir); + } + } + + @Test + public void testSampleFile() throws Exception { + + /* + Contains entries with zipTime, accessTime, and modifyTime. + The file name tells you the year we tried to set the time to + (Jan 1st, Midnight, UTC). + + For example: + + COMPRESS-210_unix_time_zip_test/1999 + COMPRESS-210_unix_time_zip_test/2000 + COMPRESS-210_unix_time_zip_test/2108 + + File's last-modified is 1st second after midnight. + Zip-time's 2-second granularity rounds that up to 2nd second. + File's last-access is 3rd second after midnight. + + So, from example above: + + 1999's zip time: Jan 1st, 1999-01-01/00:00:02 + 1999's mod time: Jan 1st, 1999-01-01/00:00:01 + 1999's acc time: Jan 1st, 1999-01-01/00:00:03 + + Starting with a patch release of Java8, "zip time" actually + uses the extended time stamp field itself and should be the + same as "mod time". + http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/rev/90df6756406f + + Starting with Java9 the parser for extended time stamps has + been fixed to use signed integers which was detected during + the triage of COMPRESS-416. Signed integers is the correct + format and Compress 1.15 has started to use signed integers as + well. + */ + + final File archive = getFile("COMPRESS-210_unix_time_zip_test.zip"); + ZipFile zf = null; + + try { + zf = new ZipFile(archive); + final Enumeration<ZipArchiveEntry> en = zf.getEntries(); + + // We expect EVERY entry of this zip file + // to contain extra field 0x5455. + while (en.hasMoreElements()) { + + final ZipArchiveEntry zae = en.nextElement(); + if (zae.isDirectory()) { + continue; + } + final String name = zae.getName(); + final int x = name.lastIndexOf('/'); + final String yearString = name.substring(x + 1); + int year; + try { + year = Integer.parseInt(yearString); + } catch (final NumberFormatException nfe) { + // setTime.sh, skip + continue; + } + + final X5455_ExtendedTimestamp xf = (X5455_ExtendedTimestamp) zae.getExtraField(X5455); + final Date rawZ = zae.getLastModifiedDate(); + final Date m = xf.getModifyJavaTime(); + + /* + We must distinguish three cases: + - Java has read the extended time field itself and agrees with us (Java9 or Java8 and years prior to + 2038) + - Java has read the extended time field but found a year >= 2038 (Java8) + - Java hasn't read the extended time field at all (Java7- or early Java8) + */ + + final boolean zipTimeUsesExtendedTimestampCorrectly = rawZ.equals(m); + final boolean zipTimeUsesExtendedTimestampButUnsigned = year > 2037 && rawZ.getSeconds() == 1; + final boolean zipTimeUsesExtendedTimestamp = zipTimeUsesExtendedTimestampCorrectly + || zipTimeUsesExtendedTimestampButUnsigned; + + final Date z = zipTimeUsesExtendedTimestamp ? rawZ : adjustFromGMTToExpectedOffset(rawZ); + final Date a = xf.getAccessJavaTime(); + + final String zipTime = DATE_FORMAT.format(z); + final String modTime = DATE_FORMAT.format(m); + final String accTime = DATE_FORMAT.format(a); + + switch (year) { + case 2109: + // All three timestamps have overflowed by 2109. + if (!zipTimeUsesExtendedTimestamp) { + assertEquals("1981-01-01/00:00:02 +0000", zipTime); + } + break; + default: + if (!zipTimeUsesExtendedTimestamp) { + // X5455 time is good from epoch (1970) to 2037. + // Zip time is good from 1980 to 2107. + if (year < 1980) { + assertEquals("1980-01-01/08:00:00 +0000", zipTime); + } else { + assertEquals(year + "-01-01/00:00:02 +0000", zipTime); + } + } + + if (year < 2038) { + assertEquals(year + "-01-01/00:00:01 +0000", modTime); + assertEquals(year + "-01-01/00:00:03 +0000", accTime); + } + break; + } + } + } finally { + if (zf != null) { + zf.close(); + } + } + } + + + @Test + public void testMisc() throws Exception { + assertFalse(xf.equals(new Object())); + assertTrue(xf.toString().startsWith("0x5455 Zip Extra Field")); + assertTrue(!xf.toString().contains(" Modify:")); + assertTrue(!xf.toString().contains(" Access:")); + assertTrue(!xf.toString().contains(" Create:")); + Object o = xf.clone(); + assertEquals(o.hashCode(), xf.hashCode()); + assertTrue(xf.equals(o)); + + xf.setModifyJavaTime(new Date(1111)); + xf.setAccessJavaTime(new Date(2222)); + xf.setCreateJavaTime(new Date(3333)); + xf.setFlags((byte) 7); + assertFalse(xf.equals(o)); + assertTrue(xf.toString().startsWith("0x5455 Zip Extra Field")); + assertTrue(xf.toString().contains(" Modify:")); + assertTrue(xf.toString().contains(" Access:")); + assertTrue(xf.toString().contains(" Create:")); + o = xf.clone(); + assertEquals(o.hashCode(), xf.hashCode()); + assertTrue(xf.equals(o)); + } + + @Test + public void testGettersSetters() { + // X5455 is concerned with time, so let's + // get a timestamp to play with (Jan 1st, 2000). + final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + cal.set(Calendar.YEAR, 2000); + cal.set(Calendar.MONTH, Calendar.JANUARY); + cal.set(Calendar.DATE, 1); + cal.set(Calendar.HOUR_OF_DAY, 0); + cal.set(Calendar.MINUTE, 0); + cal.set(Calendar.SECOND, 0); + cal.set(Calendar.MILLISECOND, 0); + final long timeMillis = cal.getTimeInMillis(); + final ZipLong time = new ZipLong(timeMillis / 1000); + + // set too big + try { + // Java time is 1000 x larger (milliseconds). + xf.setModifyJavaTime(new Date(1000L * (MAX_TIME_SECONDS.getValue() + 1L))); + fail("Time too big for 32 bits!"); + } catch (final IllegalArgumentException iae) { + // All is good. + } + + // get/set modify time + xf.setModifyTime(time); + assertEquals(time, xf.getModifyTime()); + Date xfModifyJavaTime = xf.getModifyJavaTime(); + assertEquals(timeMillis, xfModifyJavaTime.getTime()); + xf.setModifyJavaTime(new Date(timeMillis)); + assertEquals(time, xf.getModifyTime()); + assertEquals(timeMillis, xf.getModifyJavaTime().getTime()); + // Make sure milliseconds get zeroed out: + xf.setModifyJavaTime(new Date(timeMillis + 123)); + assertEquals(time, xf.getModifyTime()); + assertEquals(timeMillis, xf.getModifyJavaTime().getTime()); + // Null + xf.setModifyTime(null); + assertNull(xf.getModifyJavaTime()); + xf.setModifyJavaTime(null); + assertNull(xf.getModifyTime()); + + // get/set access time + xf.setAccessTime(time); + assertEquals(time, xf.getAccessTime()); + assertEquals(timeMillis, xf.getAccessJavaTime().getTime()); + xf.setAccessJavaTime(new Date(timeMillis)); + assertEquals(time, xf.getAccessTime()); + assertEquals(timeMillis, xf.getAccessJavaTime().getTime()); + // Make sure milliseconds get zeroed out: + xf.setAccessJavaTime(new Date(timeMillis + 123)); + assertEquals(time, xf.getAccessTime()); + assertEquals(timeMillis, xf.getAccessJavaTime().getTime()); + // Null + xf.setAccessTime(null); + assertNull(xf.getAccessJavaTime()); + xf.setAccessJavaTime(null); + assertNull(xf.getAccessTime()); + + // get/set create time + xf.setCreateTime(time); + assertEquals(time, xf.getCreateTime()); + assertEquals(timeMillis, xf.getCreateJavaTime().getTime()); + xf.setCreateJavaTime(new Date(timeMillis)); + assertEquals(time, xf.getCreateTime()); + assertEquals(timeMillis, xf.getCreateJavaTime().getTime()); + // Make sure milliseconds get zeroed out: + xf.setCreateJavaTime(new Date(timeMillis + 123)); + assertEquals(time, xf.getCreateTime()); + assertEquals(timeMillis, xf.getCreateJavaTime().getTime()); + // Null + xf.setCreateTime(null); + assertNull(xf.getCreateJavaTime()); + xf.setCreateJavaTime(null); + assertNull(xf.getCreateTime()); + + + // initialize for flags + xf.setModifyTime(time); + xf.setAccessTime(time); + xf.setCreateTime(time); + + // get/set flags: 000 + xf.setFlags((byte) 0); + assertEquals(0, xf.getFlags()); + assertFalse(xf.isBit0_modifyTimePresent()); + assertFalse(xf.isBit1_accessTimePresent()); + assertFalse(xf.isBit2_createTimePresent()); + // Local length=1, Central length=1 (flags only!) + assertEquals(1, xf.getLocalFileDataLength().getValue()); + assertEquals(1, xf.getCentralDirectoryLength().getValue()); + + // get/set flags: 001 + xf.setFlags((byte) 1); + assertEquals(1, xf.getFlags()); + assertTrue(xf.isBit0_modifyTimePresent()); + assertFalse(xf.isBit1_accessTimePresent()); + assertFalse(xf.isBit2_createTimePresent()); + // Local length=5, Central length=5 (flags + mod) + assertEquals(5, xf.getLocalFileDataLength().getValue()); + assertEquals(5, xf.getCentralDirectoryLength().getValue()); + + // get/set flags: 010 + xf.setFlags((byte) 2); + assertEquals(2, xf.getFlags()); + assertFalse(xf.isBit0_modifyTimePresent()); + assertTrue(xf.isBit1_accessTimePresent()); + assertFalse(xf.isBit2_createTimePresent()); + // Local length=5, Central length=1 + assertEquals(5, xf.getLocalFileDataLength().getValue()); + assertEquals(1, xf.getCentralDirectoryLength().getValue()); + + // get/set flags: 100 + xf.setFlags((byte) 4); + assertEquals(4, xf.getFlags()); + assertFalse(xf.isBit0_modifyTimePresent()); + assertFalse(xf.isBit1_accessTimePresent()); + assertTrue(xf.isBit2_createTimePresent()); + // Local length=5, Central length=1 + assertEquals(5, xf.getLocalFileDataLength().getValue()); + assertEquals(1, xf.getCentralDirectoryLength().getValue()); + + // get/set flags: 111 + xf.setFlags((byte) 7); + assertEquals(7, xf.getFlags()); + assertTrue(xf.isBit0_modifyTimePresent()); + assertTrue(xf.isBit1_accessTimePresent()); + assertTrue(xf.isBit2_createTimePresent()); + // Local length=13, Central length=5 + assertEquals(13, xf.getLocalFileDataLength().getValue()); + assertEquals(5, xf.getCentralDirectoryLength().getValue()); + + // get/set flags: 11111111 + xf.setFlags((byte) -1); + assertEquals(-1, xf.getFlags()); + assertTrue(xf.isBit0_modifyTimePresent()); + assertTrue(xf.isBit1_accessTimePresent()); + assertTrue(xf.isBit2_createTimePresent()); + // Local length=13, Central length=5 + assertEquals(13, xf.getLocalFileDataLength().getValue()); + assertEquals(5, xf.getCentralDirectoryLength().getValue()); + } + + @Test + public void testGetHeaderId() { + assertEquals(X5455, xf.getHeaderId()); + } + + @Test + public void testParseReparse() throws ZipException { + /* + * Recall the spec: + * + * 0x5455 Short tag for this extra block type ("UT") + * TSize Short total data size for this block + * Flags Byte info bits + * (ModTime) Long time of last modification (UTC/GMT) + * (AcTime) Long time of last access (UTC/GMT) + * (CrTime) Long time of original creation (UTC/GMT) + */ + final byte[] NULL_FLAGS = {0}; + final byte[] AC_CENTRAL = {2}; // central data only contains the AC flag and no actual data + final byte[] CR_CENTRAL = {4}; // central data only dontains the CR flag and no actual data + + final byte[] MOD_ZERO = {1, 0, 0, 0, 0}; + final byte[] MOD_MAX = {1, -1, -1, -1, 0x7f}; + final byte[] AC_ZERO = {2, 0, 0, 0, 0}; + final byte[] AC_MAX = {2, -1, -1, -1, 0x7f}; + final byte[] CR_ZERO = {4, 0, 0, 0, 0}; + final byte[] CR_MAX = {4, -1, -1, -1, 0x7f}; + final byte[] MOD_AC_ZERO = {3, 0, 0, 0, 0, 0, 0, 0, 0}; + final byte[] MOD_AC_MAX = {3, -1, -1, -1, 0x7f, -1, -1, -1, 0x7f}; + final byte[] MOD_AC_CR_ZERO = {7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + final byte[] MOD_AC_CR_MAX = {7, -1, -1, -1, 0x7f, -1, -1, -1, 0x7f, -1, -1, -1, 0x7f}; + + parseReparse(null, NULL_FLAGS, NULL_FLAGS); + parseReparse(ZERO_TIME, MOD_ZERO, MOD_ZERO); + parseReparse(MAX_TIME_SECONDS, MOD_MAX, MOD_MAX); + parseReparse(ZERO_TIME, AC_ZERO, AC_CENTRAL); + parseReparse(MAX_TIME_SECONDS, AC_MAX, AC_CENTRAL); + parseReparse(ZERO_TIME, CR_ZERO, CR_CENTRAL); + parseReparse(MAX_TIME_SECONDS, CR_MAX, CR_CENTRAL); + parseReparse(ZERO_TIME, MOD_AC_ZERO, MOD_ZERO); + parseReparse(MAX_TIME_SECONDS, MOD_AC_MAX, MOD_MAX); + parseReparse(ZERO_TIME, MOD_AC_CR_ZERO, MOD_ZERO); + parseReparse(MAX_TIME_SECONDS, MOD_AC_CR_MAX, MOD_MAX); + + // As far as the spec is concerned (December 2012) all of these flags + // are spurious versions of 7 (a.k.a. binary 00000111). + parseReparse((byte) 15, MAX_TIME_SECONDS, (byte) 7, MOD_AC_CR_MAX, MOD_MAX); + parseReparse((byte) 31, MAX_TIME_SECONDS, (byte) 7, MOD_AC_CR_MAX, MOD_MAX); + parseReparse((byte) 63, MAX_TIME_SECONDS, (byte) 7, MOD_AC_CR_MAX, MOD_MAX); + parseReparse((byte) 71, MAX_TIME_SECONDS, (byte) 7, MOD_AC_CR_MAX, MOD_MAX); + parseReparse((byte) 127, MAX_TIME_SECONDS, (byte) 7, MOD_AC_CR_MAX, MOD_MAX); + parseReparse((byte) -1, MAX_TIME_SECONDS, (byte) 7, MOD_AC_CR_MAX, MOD_MAX); + } + + @Test + public void testWriteReadRoundtrip() throws IOException { + tmpDir = mkdir("X5455"); + final File output = new File(tmpDir, "write_rewrite.zip"); + final OutputStream out = new FileOutputStream(output); + final Date d = new Date(97, 8, 24, 15, 10, 2); + ZipArchiveOutputStream os = null; + try { + os = new ZipArchiveOutputStream(out); + final ZipArchiveEntry ze = new ZipArchiveEntry("foo"); + xf.setModifyJavaTime(d); + xf.setFlags((byte) 1); + ze.addExtraField(xf); + os.putArchiveEntry(ze); + os.closeArchiveEntry(); + } finally { + if (os != null) { + os.close(); + } + } + out.close(); + + final ZipFile zf = new ZipFile(output); + final ZipArchiveEntry ze = zf.getEntry("foo"); + final X5455_ExtendedTimestamp ext = + (X5455_ExtendedTimestamp) ze.getExtraField(X5455); + assertNotNull(ext); + assertTrue(ext.isBit0_modifyTimePresent()); + assertEquals(d, ext.getModifyJavaTime()); + zf.close(); + } + + @Test + public void testBitsAreSetWithTime() { + xf.setModifyJavaTime(new Date(1111)); + assertTrue(xf.isBit0_modifyTimePresent()); + assertEquals(1, xf.getFlags()); + xf.setAccessJavaTime(new Date(2222)); + assertTrue(xf.isBit1_accessTimePresent()); + assertEquals(3, xf.getFlags()); + xf.setCreateJavaTime(new Date(3333)); + assertTrue(xf.isBit2_createTimePresent()); + assertEquals(7, xf.getFlags()); + xf.setModifyJavaTime(null); + assertFalse(xf.isBit0_modifyTimePresent()); + assertEquals(6, xf.getFlags()); + xf.setAccessJavaTime(null); + assertFalse(xf.isBit1_accessTimePresent()); + assertEquals(4, xf.getFlags()); + xf.setCreateJavaTime(null); + assertFalse(xf.isBit2_createTimePresent()); + assertEquals(0, xf.getFlags()); + } + + private void parseReparse( + final ZipLong time, + final byte[] expectedLocal, + final byte[] almostExpectedCentral + ) throws ZipException { + parseReparse(expectedLocal[0], time, expectedLocal[0], expectedLocal, almostExpectedCentral); + } + + private void parseReparse( + final byte providedFlags, + final ZipLong time, + final byte expectedFlags, + final byte[] expectedLocal, + final byte[] almostExpectedCentral + ) throws ZipException { + + // We're responsible for expectedCentral's flags. Too annoying to set in caller. + final byte[] expectedCentral = new byte[almostExpectedCentral.length]; + System.arraycopy(almostExpectedCentral, 0, expectedCentral, 0, almostExpectedCentral.length); + expectedCentral[0] = expectedFlags; + + xf.setModifyTime(time); + xf.setAccessTime(time); + xf.setCreateTime(time); + xf.setFlags(providedFlags); + byte[] result = xf.getLocalFileDataData(); + assertTrue(Arrays.equals(expectedLocal, result)); + + // And now we re-parse: + xf.parseFromLocalFileData(result, 0, result.length); + assertEquals(expectedFlags, xf.getFlags()); + if (isFlagSet(expectedFlags, MODIFY_TIME_BIT)) { + assertTrue(xf.isBit0_modifyTimePresent()); + assertEquals(time, xf.getModifyTime()); + } + if (isFlagSet(expectedFlags, ACCESS_TIME_BIT)) { + assertTrue(xf.isBit1_accessTimePresent()); + assertEquals(time, xf.getAccessTime()); + } + if (isFlagSet(expectedFlags, CREATE_TIME_BIT)) { + assertTrue(xf.isBit2_createTimePresent()); + assertEquals(time, xf.getCreateTime()); + } + + // Do the same as above, but with Central Directory data: + xf.setModifyTime(time); + xf.setAccessTime(time); + xf.setCreateTime(time); + xf.setFlags(providedFlags); + result = xf.getCentralDirectoryData(); + assertTrue(Arrays.equals(expectedCentral, result)); + + // And now we re-parse: + xf.parseFromCentralDirectoryData(result, 0, result.length); + assertEquals(expectedFlags, xf.getFlags()); + // Central Directory never contains ACCESS or CREATE, but + // may contain MODIFY. + if (isFlagSet(expectedFlags, MODIFY_TIME_BIT)) { + assertTrue(xf.isBit0_modifyTimePresent()); + assertEquals(time, xf.getModifyTime()); + } + } + + private static boolean isFlagSet(final byte data, final byte flag) { return (data & flag) == flag; } + + /** + * InfoZIP seems to adjust the time stored inside the LFH and CD + * to GMT when writing ZIPs while java.util.zip.ZipEntry thinks it + * was in local time. + * + * The archive read in {@link #testSampleFile} has been created + * with GMT-8 so we need to adjust for the difference. + */ + private static Date adjustFromGMTToExpectedOffset(final Date from) { + final Calendar cal = Calendar.getInstance(); + cal.setTime(from); + cal.add(Calendar.MILLISECOND, cal.get(Calendar.ZONE_OFFSET)); + if (cal.getTimeZone().inDaylightTime(from)) { + cal.add(Calendar.MILLISECOND, cal.get(Calendar.DST_OFFSET)); + } + cal.add(Calendar.HOUR, 8); + return cal.getTime(); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/X7875_NewUnixTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/X7875_NewUnixTest.java new file mode 100644 index 000000000..b1488091d --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/X7875_NewUnixTest.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.zip.ZipException; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class X7875_NewUnixTest { + + private final static ZipShort X7875 = new ZipShort(0x7875); + + private X7875_NewUnix xf; + + @Before + public void before() { + xf = new X7875_NewUnix(); + } + + + @Test + public void testSampleFile() throws Exception { + final File archive = getFile("COMPRESS-211_uid_gid_zip_test.zip"); + ZipFile zf = null; + + try { + zf = new ZipFile(archive); + final Enumeration<ZipArchiveEntry> en = zf.getEntries(); + + // We expect EVERY entry of this zip file (dir & file) to + // contain extra field 0x7875. + while (en.hasMoreElements()) { + + final ZipArchiveEntry zae = en.nextElement(); + final String name = zae.getName(); + final X7875_NewUnix xf = (X7875_NewUnix) zae.getExtraField(X7875); + + // The directory entry in the test zip file is uid/gid 1000. + long expected = 1000; + if (name.contains("uid555_gid555")) { + expected = 555; + } else if (name.contains("uid5555_gid5555")) { + expected = 5555; + } else if (name.contains("uid55555_gid55555")) { + expected = 55555; + } else if (name.contains("uid555555_gid555555")) { + expected = 555555; + } else if (name.contains("min_unix")) { + expected = 0; + } else if (name.contains("max_unix")) { + // 2^32-2 was the biggest UID/GID I could create on my linux! + // (December 2012, linux kernel 3.4) + expected = 0x100000000L - 2; + } + assertEquals(expected, xf.getUID()); + assertEquals(expected, xf.getGID()); + } + } finally { + if (zf != null) { + zf.close(); + } + } + } + + @Test + public void testGetHeaderId() { + assertEquals(X7875, xf.getHeaderId()); + } + + @Test + public void testMisc() throws Exception { + assertFalse(xf.equals(new Object())); + assertTrue(xf.toString().startsWith("0x7875 Zip Extra Field")); + final Object o = xf.clone(); + assertEquals(o.hashCode(), xf.hashCode()); + assertTrue(xf.equals(o)); + xf.setUID(12345); + assertFalse(xf.equals(o)); + } + + @Test + public void testTrimLeadingZeroesForceMinLength4() { + final byte[] NULL = null; + final byte[] EMPTY = new byte[0]; + final byte[] ONE_ZERO = {0}; + final byte[] TWO_ZEROES = {0, 0}; + final byte[] FOUR_ZEROES = {0, 0, 0, 0}; + final byte[] SEQUENCE = {1, 2, 3}; + final byte[] SEQUENCE_LEADING_ZERO = {0, 1, 2, 3}; + final byte[] SEQUENCE_LEADING_ZEROES = {0, 0, 0, 0, 0, 0, 0, 1, 2, 3}; + final byte[] TRAILING_ZERO = {1, 2, 3, 0}; + final byte[] PADDING_ZERO = {0, 1, 2, 3, 0}; + final byte[] SEQUENCE6 = {1, 2, 3, 4, 5, 6}; + final byte[] SEQUENCE6_LEADING_ZERO = {0, 1, 2, 3, 4, 5, 6}; + + assertTrue(NULL == trimTest(NULL)); + assertTrue(Arrays.equals(ONE_ZERO, trimTest(EMPTY))); + assertTrue(Arrays.equals(ONE_ZERO, trimTest(ONE_ZERO))); + assertTrue(Arrays.equals(ONE_ZERO, trimTest(TWO_ZEROES))); + assertTrue(Arrays.equals(ONE_ZERO, trimTest(FOUR_ZEROES))); + assertTrue(Arrays.equals(SEQUENCE, trimTest(SEQUENCE))); + assertTrue(Arrays.equals(SEQUENCE, trimTest(SEQUENCE_LEADING_ZERO))); + assertTrue(Arrays.equals(SEQUENCE, trimTest(SEQUENCE_LEADING_ZEROES))); + assertTrue(Arrays.equals(TRAILING_ZERO, trimTest(TRAILING_ZERO))); + assertTrue(Arrays.equals(TRAILING_ZERO, trimTest(PADDING_ZERO))); + assertTrue(Arrays.equals(SEQUENCE6, trimTest(SEQUENCE6))); + assertTrue(Arrays.equals(SEQUENCE6, trimTest(SEQUENCE6_LEADING_ZERO))); + } + + private static byte[] trimTest(final byte[] b) { return X7875_NewUnix.trimLeadingZeroesForceMinLength(b); } + + @Test + public void testParseReparse() throws ZipException { + + // Version=1, Len=0, Len=0. + final byte[] ZERO_LEN = {1, 0, 0}; + + // Version=1, Len=1, zero, Len=1, zero. + final byte[] ZERO_UID_GID = {1, 1, 0, 1, 0}; + + // Version=1, Len=1, one, Len=1, one + final byte[] ONE_UID_GID = {1, 1, 1, 1, 1}; + + // Version=1, Len=2, one thousand, Len=2, one thousand + final byte[] ONE_THOUSAND_UID_GID = {1, 2, -24, 3, 2, -24, 3}; + + // (2^32 - 2). I guess they avoid (2^32 - 1) since it's identical to -1 in + // two's complement, and -1 often has a special meaning. + final byte[] UNIX_MAX_UID_GID = {1, 4, -2, -1, -1, -1, 4, -2, -1, -1, -1}; + + // Version=1, Len=5, 2^32, Len=5, 2^32 + 1 + // Esoteric test: can we handle 40 bit numbers? + final byte[] LENGTH_5 = {1, 5, 0, 0, 0, 0, 1, 5, 1, 0, 0, 0, 1}; + + // Version=1, Len=8, 2^63 - 2, Len=8, 2^63 - 1 + // Esoteric test: can we handle 64 bit numbers? + final byte[] LENGTH_8 = {1, 8, -2, -1, -1, -1, -1, -1, -1, 127, 8, -1, -1, -1, -1, -1, -1, -1, 127}; + + final long TWO_TO_32 = 0x100000000L; + final long MAX = TWO_TO_32 - 2; + + parseReparse(0, 0, ZERO_LEN, 0, 0); + parseReparse(0, 0, ZERO_UID_GID, 0, 0); + parseReparse(1, 1, ONE_UID_GID, 1, 1); + parseReparse(1000, 1000, ONE_THOUSAND_UID_GID, 1000, 1000); + parseReparse(MAX, MAX, UNIX_MAX_UID_GID, MAX, MAX); + parseReparse(-2, -2, UNIX_MAX_UID_GID, MAX, MAX); + parseReparse(TWO_TO_32, TWO_TO_32 + 1, LENGTH_5, TWO_TO_32, TWO_TO_32 + 1); + parseReparse(Long.MAX_VALUE - 1, Long.MAX_VALUE, LENGTH_8, Long.MAX_VALUE - 1, Long.MAX_VALUE); + + // We never emit this, but we should be able to parse it: + final byte[] SPURIOUS_ZEROES_1 = {1, 4, -1, 0, 0, 0, 4, -128, 0, 0, 0}; + final byte[] EXPECTED_1 = {1, 1, -1, 1, -128}; + xf.parseFromLocalFileData(SPURIOUS_ZEROES_1, 0, SPURIOUS_ZEROES_1.length); + + assertEquals(255, xf.getUID()); + assertEquals(128, xf.getGID()); + assertTrue(Arrays.equals(EXPECTED_1, xf.getLocalFileDataData())); + + final byte[] SPURIOUS_ZEROES_2 = {1, 4, -1, -1, 0, 0, 4, 1, 2, 0, 0}; + final byte[] EXPECTED_2 = {1, 2, -1, -1, 2, 1, 2}; + xf.parseFromLocalFileData(SPURIOUS_ZEROES_2, 0, SPURIOUS_ZEROES_2.length); + + assertEquals(65535, xf.getUID()); + assertEquals(513, xf.getGID()); + assertTrue(Arrays.equals(EXPECTED_2, xf.getLocalFileDataData())); + } + + + private void parseReparse( + final long uid, + final long gid, + final byte[] expected, + final long expectedUID, + final long expectedGID + ) throws ZipException { + + // Initial local parse (init with garbage to avoid defaults causing test to pass). + xf.setUID(54321); + xf.setGID(12345); + xf.parseFromLocalFileData(expected, 0, expected.length); + assertEquals(expectedUID, xf.getUID()); + assertEquals(expectedGID, xf.getGID()); + + xf.setUID(uid); + xf.setGID(gid); + if (expected.length < 5) { + // We never emit zero-length entries. + assertEquals(5, xf.getLocalFileDataLength().getValue()); + } else { + assertEquals(expected.length, xf.getLocalFileDataLength().getValue()); + } + byte[] result = xf.getLocalFileDataData(); + if (expected.length < 5) { + // We never emit zero-length entries. + assertTrue(Arrays.equals(new byte[]{1,1,0,1,0}, result)); + } else { + assertTrue(Arrays.equals(expected, result)); + } + + + + // And now we re-parse: + xf.parseFromLocalFileData(result, 0, result.length); + + // Did uid/gid change from re-parse? They shouldn't! + assertEquals(expectedUID, xf.getUID()); + assertEquals(expectedGID, xf.getGID()); + + assertEquals(0, xf.getCentralDirectoryLength().getValue()); + result = xf.getCentralDirectoryData(); + assertArrayEquals(new byte[0], result); + + // And now we re-parse: + xf.parseFromCentralDirectoryData(result, 0, result.length); + + // Did uid/gid change from 2nd re-parse? They shouldn't! + assertEquals(expectedUID, xf.getUID()); + assertEquals(expectedGID, xf.getGID()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/Zip64ExtendedInformationExtraFieldTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/Zip64ExtendedInformationExtraFieldTest.java new file mode 100644 index 000000000..e366ffafb --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/Zip64ExtendedInformationExtraFieldTest.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.math.BigInteger; +import java.util.zip.ZipException; + +import org.junit.Test; + +public class Zip64ExtendedInformationExtraFieldTest { + + private static final ZipEightByteInteger SIZE = + new ZipEightByteInteger(0x12345678); + private static final ZipEightByteInteger CSIZE = + new ZipEightByteInteger(0x9ABCDEF); + private static final ZipEightByteInteger OFF = + new ZipEightByteInteger(BigInteger.valueOf(0xABCDEF091234567l) + .shiftLeft(4) + .setBit(3)); + private static final ZipLong DISK = new ZipLong(0x12); + + @Test + public void testWriteCDOnlySizes() { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(SIZE, CSIZE); + assertEquals(new ZipShort(16), f.getCentralDirectoryLength()); + final byte[] b = f.getCentralDirectoryData(); + assertEquals(16, b.length); + checkSizes(b); + } + + @Test + public void testWriteCDSizeAndOffset() { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(SIZE, CSIZE, OFF, null); + assertEquals(new ZipShort(24), f.getCentralDirectoryLength()); + final byte[] b = f.getCentralDirectoryData(); + assertEquals(24, b.length); + checkSizes(b); + checkOffset(b, 16); + } + + @Test + public void testWriteCDSizeOffsetAndDisk() { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(SIZE, CSIZE, OFF, DISK); + assertEquals(new ZipShort(28), f.getCentralDirectoryLength()); + final byte[] b = f.getCentralDirectoryData(); + assertEquals(28, b.length); + checkSizes(b); + checkOffset(b, 16); + checkDisk(b, 24); + } + + @Test + public void testWriteCDSizeAndDisk() { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(SIZE, CSIZE, null, DISK); + assertEquals(new ZipShort(20), f.getCentralDirectoryLength()); + final byte[] b = f.getCentralDirectoryData(); + assertEquals(20, b.length); + checkSizes(b); + checkDisk(b, 16); + } + + @Test + public void testReadLFHSizesOnly() throws ZipException { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(); + final byte[] b = new byte[16]; + System.arraycopy(SIZE.getBytes(), 0, b, 0, 8); + System.arraycopy(CSIZE.getBytes(), 0, b, 8, 8); + f.parseFromLocalFileData(b, 0, b.length); + assertEquals(SIZE, f.getSize()); + assertEquals(CSIZE, f.getCompressedSize()); + assertNull(f.getRelativeHeaderOffset()); + assertNull(f.getDiskStartNumber()); + } + + @Test + public void testReadLFHSizesAndOffset() throws ZipException { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(); + final byte[] b = new byte[24]; + System.arraycopy(SIZE.getBytes(), 0, b, 0, 8); + System.arraycopy(CSIZE.getBytes(), 0, b, 8, 8); + System.arraycopy(OFF.getBytes(), 0, b, 16, 8); + f.parseFromLocalFileData(b, 0, b.length); + assertEquals(SIZE, f.getSize()); + assertEquals(CSIZE, f.getCompressedSize()); + assertEquals(OFF, f.getRelativeHeaderOffset()); + assertNull(f.getDiskStartNumber()); + } + + @Test + public void testReadLFHSizesOffsetAndDisk() throws ZipException { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(); + final byte[] b = new byte[28]; + System.arraycopy(SIZE.getBytes(), 0, b, 0, 8); + System.arraycopy(CSIZE.getBytes(), 0, b, 8, 8); + System.arraycopy(OFF.getBytes(), 0, b, 16, 8); + System.arraycopy(DISK.getBytes(), 0, b, 24, 4); + f.parseFromLocalFileData(b, 0, b.length); + assertEquals(SIZE, f.getSize()); + assertEquals(CSIZE, f.getCompressedSize()); + assertEquals(OFF, f.getRelativeHeaderOffset()); + assertEquals(DISK, f.getDiskStartNumber()); + } + + @Test + public void testReadLFHSizesAndDisk() throws ZipException { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(); + final byte[] b = new byte[20]; + System.arraycopy(SIZE.getBytes(), 0, b, 0, 8); + System.arraycopy(CSIZE.getBytes(), 0, b, 8, 8); + System.arraycopy(DISK.getBytes(), 0, b, 16, 4); + f.parseFromLocalFileData(b, 0, b.length); + assertEquals(SIZE, f.getSize()); + assertEquals(CSIZE, f.getCompressedSize()); + assertNull(f.getRelativeHeaderOffset()); + assertEquals(DISK, f.getDiskStartNumber()); + } + + @Test + public void testReadCDSizesOffsetAndDisk() throws ZipException { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(); + final byte[] b = new byte[28]; + System.arraycopy(SIZE.getBytes(), 0, b, 0, 8); + System.arraycopy(CSIZE.getBytes(), 0, b, 8, 8); + System.arraycopy(OFF.getBytes(), 0, b, 16, 8); + System.arraycopy(DISK.getBytes(), 0, b, 24, 4); + f.parseFromCentralDirectoryData(b, 0, b.length); + assertEquals(SIZE, f.getSize()); + assertEquals(CSIZE, f.getCompressedSize()); + assertEquals(OFF, f.getRelativeHeaderOffset()); + assertEquals(DISK, f.getDiskStartNumber()); + } + + @Test + public void testReadCDSizesAndOffset() throws ZipException { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(); + final byte[] b = new byte[24]; + System.arraycopy(SIZE.getBytes(), 0, b, 0, 8); + System.arraycopy(CSIZE.getBytes(), 0, b, 8, 8); + System.arraycopy(OFF.getBytes(), 0, b, 16, 8); + f.parseFromCentralDirectoryData(b, 0, b.length); + assertEquals(SIZE, f.getSize()); + assertEquals(CSIZE, f.getCompressedSize()); + assertEquals(OFF, f.getRelativeHeaderOffset()); + assertNull(f.getDiskStartNumber()); + } + + @Test + public void testReadCDSomethingAndDisk() throws ZipException { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(); + final byte[] b = new byte[12]; + System.arraycopy(SIZE.getBytes(), 0, b, 0, 8); + System.arraycopy(DISK.getBytes(), 0, b, 8, 4); + f.parseFromCentralDirectoryData(b, 0, b.length); + assertNull(f.getSize()); + assertNull(f.getCompressedSize()); + assertNull(f.getRelativeHeaderOffset()); + assertEquals(DISK, f.getDiskStartNumber()); + } + + @Test + public void testReparseCDSingleEightByteData() throws ZipException { + final Zip64ExtendedInformationExtraField f = + new Zip64ExtendedInformationExtraField(); + final byte[] b = new byte[8]; + System.arraycopy(SIZE.getBytes(), 0, b, 0, 8); + f.parseFromCentralDirectoryData(b, 0, b.length); + f.reparseCentralDirectoryData(true, false, false, false); + assertEquals(SIZE, f.getSize()); + assertNull(f.getCompressedSize()); + assertNull(f.getRelativeHeaderOffset()); + assertNull(f.getDiskStartNumber()); + f.setSize(null); + f.reparseCentralDirectoryData(false, true, false, false); + assertNull(f.getSize()); + assertEquals(SIZE, f.getCompressedSize()); + assertNull(f.getRelativeHeaderOffset()); + assertNull(f.getDiskStartNumber()); + f.setCompressedSize(null); + f.reparseCentralDirectoryData(false, false, true, false); + assertNull(f.getSize()); + assertNull(f.getCompressedSize()); + assertEquals(SIZE, f.getRelativeHeaderOffset()); + assertNull(f.getDiskStartNumber()); + } + + private static void checkSizes(final byte[] b) { + assertEquals(0x78, b[0]); + assertEquals(0x56, b[1]); + assertEquals(0x34, b[2]); + assertEquals(0x12, b[3]); + assertEquals(0x00, b[4]); + assertEquals(0x00, b[5]); + assertEquals(0x00, b[6]); + assertEquals(0x00, b[7]); + assertEquals((byte) 0xEF, b[8]); + assertEquals((byte) 0xCD, b[9]); + assertEquals((byte) 0xAB, b[10]); + assertEquals(0x09, b[11]); + assertEquals(0x00, b[12]); + assertEquals(0x00, b[13]); + assertEquals(0x00, b[14]); + assertEquals(0x00, b[15]); + } + + private static void checkOffset(final byte[] b, final int off) { + assertEquals(0x78, b[0 + off]); + assertEquals(0x56, b[1 + off]); + assertEquals(0x34, b[2 + off]); + assertEquals(0x12, b[3 + off]); + assertEquals((byte) 0x09, b[4 + off]); + assertEquals((byte) 0xEF, b[5 + off]); + assertEquals((byte) 0xCD, b[6 + off]); + assertEquals((byte) 0xAB, b[7 + off]); + } + + private static void checkDisk(final byte[] b, final int off) { + assertEquals(0x12, b[0 + off]); + assertEquals(0x00, b[1 + off]); + assertEquals(0x00, b[2 + off]); + assertEquals(0x00, b[3 + off]); + } +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/Zip64SupportIT.java b/src/test/java/org/apache/commons/compress/archivers/zip/Zip64SupportIT.java new file mode 100644 index 000000000..ba30c3f30 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/Zip64SupportIT.java @@ -0,0 +1,2662 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; +import java.util.Enumeration; +import java.util.Random; +import java.util.zip.ZipEntry; + +import org.apache.commons.compress.AbstractTestCase; +import org.junit.Test; + +public class Zip64SupportIT { + + private static final long FIVE_BILLION = 5000000000l; + private static final int ONE_MILLION = 1000000; + private static final int ONE_HUNDRED_THOUSAND = 100000; + + @Test public void read5GBOfZerosUsingInputStream() throws Throwable { + read5GBOfZerosImpl(get5GBZerosFile(), "5GB_of_Zeros"); + } + + @Test public void read5GBOfZerosGeneratedBy7ZIPUsingInputStream() + throws Throwable { + read5GBOfZerosImpl(get5GBZerosFileGeneratedBy7ZIP(), "5GB_of_Zeros"); + } + + @Test public void read5GBOfZerosGeneratedByJava7JarUsingInputStream() + throws Throwable { + read5GBOfZerosImpl(get5GBZerosFileGeneratedByJava7Jar(), "5GB_of_Zeros"); + } + + @Test public void read5GBOfZerosGeneratedByWinZIPUsingInputStream() + throws Throwable { + read5GBOfZerosImpl(get5GBZerosFileGeneratedByWinZIP(), "5GB_of_Zeros"); + } + + @Test public void read5GBOfZerosGeneratedByPKZipUsingInputStream() + throws Throwable { + read5GBOfZerosImpl(get5GBZerosFileGeneratedByPKZip(), + "zip6/5GB_of_Zeros"); + } + + @Test public void read100KFilesUsingInputStream() throws Throwable { + read100KFilesImpl(get100KFileFile()); + } + + @Test public void read100KFilesGeneratedBy7ZIPUsingInputStream() + throws Throwable { + read100KFilesImpl(get100KFileFileGeneratedBy7ZIP()); + } + + @Test public void read100KFilesGeneratedByWinCFUsingInputStream() + throws Throwable { + read100KFilesImpl(get100KFileFileGeneratedByWinCF()); + } + + @Test public void read100KFilesGeneratedByJava7JarUsingInputStream() + throws Throwable { + read100KFilesImpl(get100KFileFileGeneratedByJava7Jar()); + } + + @Test public void read100KFilesGeneratedByWinZIPUsingInputStream() + throws Throwable { + read100KFilesImpl(get100KFileFileGeneratedByWinZIP()); + } + + @Test public void read100KFilesGeneratedByPKZipUsingInputStream() + throws Throwable { + read100KFilesImpl(get100KFileFileGeneratedByPKZip()); + } + + @Test public void read5GBOfZerosUsingZipFile() throws Throwable { + read5GBOfZerosUsingZipFileImpl(get5GBZerosFile(), "5GB_of_Zeros"); + } + + @Test public void read5GBOfZerosGeneratedBy7ZIPUsingZipFile() + throws Throwable { + read5GBOfZerosUsingZipFileImpl(get5GBZerosFileGeneratedBy7ZIP(), + "5GB_of_Zeros"); + } + + @Test public void read5GBOfZerosGeneratedByJava7JarUsingZipFile() + throws Throwable { + read5GBOfZerosUsingZipFileImpl(get5GBZerosFileGeneratedByJava7Jar(), + "5GB_of_Zeros"); + } + + @Test public void read5GBOfZerosGeneratedByWinZIPUsingZipFile() + throws Throwable { + read5GBOfZerosUsingZipFileImpl(get5GBZerosFileGeneratedByWinZIP(), + "5GB_of_Zeros"); + } + + @Test public void read5GBOfZerosGeneratedByPKZipUsingZipFile() + throws Throwable { + read5GBOfZerosUsingZipFileImpl(get5GBZerosFileGeneratedByPKZip(), + "zip6/5GB_of_Zeros"); + } + + @Test public void writeAndRead5GBOfZerosUsingZipFile() throws Throwable { + File f = null; + try { + f = write5GBZerosFile("writeAndRead5GBOfZerosUsingZipFile"); + read5GBOfZerosUsingZipFileImpl(f, "5GB_of_Zeros"); + } finally { + if (f != null) { + AbstractTestCase.tryHardToDelete(f); + } + } + } + + private static File write5GBZerosFile(final String testName) throws Throwable { + final File f = getTempFile(testName); + final ZipArchiveOutputStream zos = new ZipArchiveOutputStream(f); + try { + zos.setUseZip64(Zip64Mode.Always); + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("5GB_of_Zeros"); + zae.setSize(FIVE_BILLION); + zae.setMethod(ZipEntry.DEFLATED); + zae.setCrc(0x8a408f16L); + zos.putArchiveEntry(zae); + for (int j = 0; j < FIVE_BILLION / 1000 / 1000; j++) { + zos.write(buf); + } + zos.closeArchiveEntry(); + zos.close(); + } catch (final IOException ex) { + System.err.println("Failed to write archive because of: " + + ex.getMessage() + + " - likely not enough disk space."); + assumeTrue(false); + } finally { + zos.destroy(); + } + return f; + } + + @Test public void read100KFilesUsingZipFile() throws Throwable { + read100KFilesUsingZipFileImpl(get100KFileFile()); + } + + @Test public void read100KFilesGeneratedBy7ZIPUsingZipFile() + throws Throwable { + read100KFilesUsingZipFileImpl(get100KFileFileGeneratedBy7ZIP()); + } + + @Test public void read100KFilesGeneratedByWinCFUsingZipFile() + throws Throwable { + read100KFilesUsingZipFileImpl(get100KFileFileGeneratedByWinCF()); + } + + @Test public void read100KFilesGeneratedByJava7JarUsingZipFile() + throws Throwable { + read100KFilesUsingZipFileImpl(get100KFileFileGeneratedByJava7Jar()); + } + + @Test public void read100KFilesGeneratedByWinZIPUsingZipFile() + throws Throwable { + read100KFilesUsingZipFileImpl(get100KFileFileGeneratedByWinZIP()); + } + + @Test public void read100KFilesGeneratedByPKZipUsingZipFile() + throws Throwable { + read100KFilesUsingZipFileImpl(get100KFileFileGeneratedByPKZip()); + } + + private static ZipOutputTest write100KFiles() { + return write100KFiles(Zip64Mode.AsNeeded); + } + + private static ZipOutputTest write100KFiles(final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + write100KFilesToStream(zos); + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + final long end = a.length(); + + // validate "end of central directory" is at + // the end of the file and contains the magic + // value 0xFFFF as "number of entries". + a.seek(end + - 22 /* length of EOCD without file comment */); + final byte[] eocd = new byte[12]; + a.readFully(eocd); + assertArrayEquals(new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 5, 6, + // disk numbers + 0, 0, 0, 0, + // entries + (byte) 0xff, (byte) 0xff, + (byte) 0xff, (byte) 0xff, + }, eocd); + + // validate "Zip64 end of central directory + // locator" is right in front of the EOCD and + // the location of the "Zip64 end of central + // directory record" seems correct + final long expectedZ64EocdOffset = end - 22 /* eocd.length */ + - 20 /* z64 eocd locator.length */ + - 56 /* z64 eocd without extensible data sector */; + final byte[] loc = + ZipEightByteInteger.getBytes(expectedZ64EocdOffset); + a.seek(end - 22 - 20); + final byte[] z64EocdLoc = new byte[20]; + a.readFully(z64EocdLoc); + assertArrayEquals(new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 6, 7, + // disk numbers + 0, 0, 0, 0, + // location of Zip64 EOCD, + loc[0], loc[1], loc[2], loc[3], + loc[4], loc[5], loc[6], loc[7], + // total number of disks + 1, 0, 0, 0, + }, z64EocdLoc); + + // validate "Zip64 end of central directory + // record" is where it is supposed to be, the + // known values are fine and read the location + // of the central directory from it + a.seek(expectedZ64EocdOffset); + final byte[] z64EocdStart = new byte[40]; + a.readFully(z64EocdStart); + assertArrayEquals(new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 6, 6, + // size of z64 EOCD + 44, 0, 0, 0, + 0, 0, 0, 0, + // version made by + 45, 0, + // version needed to extract + 45, 0, + // disk numbers + 0, 0, 0, 0, + 0, 0, 0, 0, + // number of entries 100k = 0x186A0 + (byte) 0xA0, (byte) 0x86, 1, 0, + 0, 0, 0, 0, + (byte) 0xA0, (byte) 0x86, 1, 0, + 0, 0, 0, 0, + }, z64EocdStart); + a.seek(expectedZ64EocdOffset + 48 /* skip size */); + final byte[] cdOffset = new byte[8]; + a.readFully(cdOffset); + final long cdLoc = ZipEightByteInteger.getLongValue(cdOffset); + + // finally verify there really is a central + // directory entry where the Zip64 EOCD claims + a.seek(cdLoc); + final byte[] sig = new byte[4]; + a.readFully(sig); + assertArrayEquals(new byte[] { + (byte) 0x50, (byte) 0x4b, 1, 2, + }, sig); + } + } + }; + } + + @Test public void write100KFilesFile() throws Throwable { + withTemporaryArchive("write100KFilesFile", write100KFiles(), true); + } + + @Test public void write100KFilesStream() throws Throwable { + withTemporaryArchive("write100KFilesStream", write100KFiles(), false); + } + + @Test public void write100KFilesFileModeAlways() throws Throwable { + withTemporaryArchive("write100KFilesFileModeAlways", + write100KFiles(Zip64Mode.Always), true); + } + + @Test public void write100KFilesStreamModeAlways() throws Throwable { + withTemporaryArchive("write100KFilesStreamModeAlways", + write100KFiles(Zip64Mode.Always), false); + } + + private static final ZipOutputTest write100KFilesModeNever = + new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + zos.setUseZip64(Zip64Mode.Never); + try { + write100KFilesToStream(zos); + fail("expected a Zip64RequiredException"); + } catch (final Zip64RequiredException ex) { + assertEquals(Zip64RequiredException.TOO_MANY_ENTRIES_MESSAGE, + ex.getMessage()); + } + } + }; + + @Test public void write100KFilesFileModeNever() throws Throwable { + withTemporaryArchive("write100KFilesFileModeNever", + write100KFilesModeNever, true); + } + + @Test public void write100KFilesStreamModeNever() throws Throwable { + withTemporaryArchive("write100KFilesStreamModeNever", + write100KFilesModeNever, false); + } + + @Test public void readSelfGenerated100KFilesUsingZipFile() + throws Throwable { + withTemporaryArchive("readSelfGenerated100KFilesUsingZipFile()", + new ZipOutputTest() { + @Override + public void test(final File f, + final ZipArchiveOutputStream zos) + throws IOException { + write100KFilesToStream(zos); + read100KFilesUsingZipFileImpl(f); + } + }, + true); + } + + private static ZipOutputTest write3EntriesCreatingBigArchive() { + return write3EntriesCreatingBigArchive(Zip64Mode.AsNeeded); + } + + /* + * Individual sizes don't require ZIP64 but the offset of the + * third entry is bigger than 0xFFFFFFFF so a ZIP64 extended + * information is needed inside the central directory. + * + * Creates a temporary archive of approx 5GB in size + */ + private static ZipOutputTest + write3EntriesCreatingBigArchive(final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + write3EntriesCreatingBigArchiveToStream(zos); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + // skip first two entries + a.skipBytes(2 * 47 /* CD entry of file with + file name length 1 and no + extra data */ + + 2 * (mode == Zip64Mode.Always ? 28 : 0) + /* ZIP64 extra fields if mode is Always */ + ); + + // grab third entry, verify offset is + // 0xFFFFFFFF and it has a ZIP64 extended + // information extra field + final byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals("CDH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 45, 0, + // version needed to extract + 45, 0, + // GPB (EFS bit) + 0, 8, + // method + 0, 0 + }, header); + // ignore timestamp, CRC, compressed size + a.skipBytes(12); + // Original Size + final byte[] originalSize = new byte[4]; + a.readFully(originalSize); + if (mode == Zip64Mode.Always) { + assertArrayEquals("CDH original size", new byte[] { + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + }, originalSize); + } else { + assertArrayEquals("CDH original size", new byte[] { + 1, 0, 0, 0 + }, originalSize); + } + final byte[] rest = new byte[19]; + a.readFully(rest); + assertArrayEquals("CDH rest", new byte[] { + // file name length + 1, 0, + // extra field length + (byte) (mode == Zip64Mode.Always? 28 : 12), 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + // offset + (byte) 0xFF, (byte) 0xFF, + (byte) 0xFF, (byte) 0xFF, + // file name + (byte) '2' + }, rest); + if (mode == Zip64Mode.Always) { + final byte[] extra = new byte[12]; + a.readFully(extra); + assertArrayEquals("CDH extra", new byte[] { + // Header-ID + 1, 0, + // size + 24, 0, + // Original Size + 1, 0, 0, 0, 0, 0, 0, 0, + }, extra); + // skip compressed size + a.skipBytes(8); + } else { + final byte[] extra = new byte[4]; + a.readFully(extra); + assertArrayEquals("CDH extra", new byte[] { + // Header-ID + 1, 0, + // size + 8, 0, + }, extra); + } + + // read offset of LFH + final byte[] offset = new byte[8]; + a.readFully(offset); + // verify there is a LFH where the CD claims it + a.seek(ZipEightByteInteger.getLongValue(offset)); + final byte[] sig = new byte[4]; + a.readFully(sig); + assertArrayEquals("LFH signature", new byte[] { + (byte) 0x50, (byte) 0x4b, 3, 4, + }, sig); + } + } + }; + } + + @Test public void write3EntriesCreatingBigArchiveFile() throws Throwable { + withTemporaryArchive("write3EntriesCreatingBigArchiveFile", + write3EntriesCreatingBigArchive(), + true); + } + + @Test public void write3EntriesCreatingBigArchiveStream() throws Throwable { + withTemporaryArchive("write3EntriesCreatingBigArchiveStream", + write3EntriesCreatingBigArchive(), + false); + } + + @Test public void write3EntriesCreatingBigArchiveFileModeAlways() + throws Throwable { + withTemporaryArchive("write3EntriesCreatingBigArchiveFileModeAlways", + write3EntriesCreatingBigArchive(Zip64Mode.Always), + true); + } + + @Test public void write3EntriesCreatingBigArchiveStreamModeAlways() + throws Throwable { + withTemporaryArchive("write3EntriesCreatingBigArchiveStreamModeAlways", + write3EntriesCreatingBigArchive(Zip64Mode.Always), + false); + } + + private static final ZipOutputTest write3EntriesCreatingBigArchiveModeNever = + new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + zos.setUseZip64(Zip64Mode.Never); + try { + write3EntriesCreatingBigArchiveToStream(zos); + fail("expected a Zip64RequiredException"); + } catch (final Zip64RequiredException ex) { + assertEquals(Zip64RequiredException.ARCHIVE_TOO_BIG_MESSAGE, + ex.getMessage()); + } + } + }; + + @Test public void write3EntriesCreatingBigArchiveFileModeNever() + throws Throwable { + withTemporaryArchive("write3EntriesCreatingBigArchiveFileModeNever", + write3EntriesCreatingBigArchiveModeNever, + true); + } + + @Test public void write3EntriesCreatingBigArchiveStreamModeNever() + throws Throwable { + withTemporaryArchive("write3EntriesCreatingBigArchiveStreamModeNever", + write3EntriesCreatingBigArchiveModeNever, + false); + } + + @Test public void read3EntriesCreatingBigArchiveFileUsingZipFile() + throws Throwable { + withTemporaryArchive("read3EntriesCreatingBigArchiveFileUsingZipFile", + new ZipOutputTest() { + @Override + public void test(final File f, + final ZipArchiveOutputStream zos) + throws IOException { + write3EntriesCreatingBigArchiveToStream(zos); + ZipFile zf = null; + try { + zf = new ZipFile(f); + int idx = 0; + for (final Enumeration<ZipArchiveEntry> e = + zf.getEntriesInPhysicalOrder(); + e.hasMoreElements(); ) { + final ZipArchiveEntry zae = e.nextElement(); + assertEquals(String.valueOf(idx), + zae.getName()); + if (idx++ < 2) { + assertEquals(FIVE_BILLION / 2, + zae.getSize()); + } else { + assertEquals(1, + zae.getSize()); + try (InputStream i = zf.getInputStream(zae)) { + assertNotNull(i); + assertEquals(42, i.read()); + } + } + } + } finally { + ZipFile.closeQuietly(zf); + } + } + }, + true); + } + + private static ZipOutputTest writeBigStoredEntry(final boolean knownSize) { + return writeBigStoredEntry(knownSize, Zip64Mode.AsNeeded); + } + + /* + * One entry of length 5 billion bytes, written without + * compression. + * + * No Compression => sizes are stored directly inside the LFH. No + * Data Descriptor at all. + * + * Creates a temporary archive of approx 5GB in size + */ + private static ZipOutputTest writeBigStoredEntry(final boolean knownSize, + final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(FIVE_BILLION); + zae.setCrc(0x5c316f50L); + } + zae.setMethod(ZipEntry.STORED); + zos.putArchiveEntry(zae); + for (int j = 0; j < FIVE_BILLION / 1000 / 1000; j++) { + zos.write(buf); + } + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + // grab first entry, verify sizes are 0xFFFFFFFF + // and it has a ZIP64 extended information extra + // field + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals("CDH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 45, 0, + // version needed to extract + 45, 0, + // GPB (EFS bit) + 0, 8, + // method + 0, 0 + }, header); + // ignore timestamp + a.skipBytes(4); + byte[] rest = new byte[26]; + a.readFully(rest); + assertArrayEquals("CDH rest", new byte[] { + // CRC + (byte) 0x50, (byte) 0x6F, (byte) 0x31, (byte) 0x5c, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + (byte) (mode == Zip64Mode.Always? 28 : 20), 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + }, rest); + byte[] offset = new byte[4]; + a.readFully(offset); + if (mode == Zip64Mode.Always) { + assertArrayEquals("offset", new byte[] { + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + }, offset); + } else { + assertArrayEquals("offset", new byte[] { + 0, 0, 0, 0, + }, offset); + } + assertEquals('0', a.read()); + final byte[] extra = new byte[20]; + a.readFully(extra); + // 5e9 == 0x12A05F200 + assertArrayEquals("CDH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + (byte) (mode == Zip64Mode.Always? 24 : 16), 0, + // original size + 0, (byte) 0xF2, 5, (byte) 0x2A, + 1, 0, 0, 0, + // compressed size + 0, (byte) 0xF2, 5, (byte) 0x2A, + 1, 0, 0, 0, + }, extra); + if (mode == Zip64Mode.Always) { + offset = new byte[8]; + a.readFully(offset); + assertArrayEquals("extra offset", new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, + }, offset); + } + + // and now validate local file header + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals("LFH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 45, 0, + // GPB (EFS bit) + 0, 8, + // method + 0, 0 + }, header); + // ignore timestamp + a.skipBytes(4); + rest = new byte[17]; + a.readFully(rest); + assertArrayEquals("LFH rest", new byte[] { + // CRC + (byte) 0x50, (byte) 0x6F, (byte) 0x31, (byte) 0x5c, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + 20, 0, + // file name + (byte) '0' + }, rest); + a.readFully(extra); + // 5e9 == 0x12A05F200 + assertArrayEquals("LFH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + 16, 0, + // original size + 0, (byte) 0xF2, 5, (byte) 0x2A, + 1, 0, 0, 0, + // compressed size + 0, (byte) 0xF2, 5, (byte) 0x2A, + 1, 0, 0, 0, + }, extra); + } + } + }; + } + + /* + * No Compression + Stream => sizes must be known before data is + * written. + */ + @Test public void writeBigStoredEntryToStream() throws Throwable { + withTemporaryArchive("writeBigStoredEntryToStream", + writeBigStoredEntry(true), + false); + } + + @Test public void writeBigStoredEntryKnownSizeToFile() throws Throwable { + withTemporaryArchive("writeBigStoredEntryKnownSizeToFile", + writeBigStoredEntry(true), + true); + } + + @Test public void writeBigStoredEntryUnnownSizeToFile() throws Throwable { + withTemporaryArchive("writeBigStoredEntryUnknownSizeToFile", + writeBigStoredEntry(false), + true); + } + + @Test public void writeBigStoredEntryToStreamModeAlways() throws Throwable { + withTemporaryArchive("writeBigStoredEntryToStreamModeAlways", + writeBigStoredEntry(true, Zip64Mode.Always), + false); + } + + @Test public void writeBigStoredEntryKnownSizeToFileModeAlways() + throws Throwable { + withTemporaryArchive("writeBigStoredEntryKnownSizeToFileModeAlways", + writeBigStoredEntry(true, Zip64Mode.Always), + true); + } + + @Test public void writeBigStoredEntryUnnownSizeToFileModeAlways() + throws Throwable { + withTemporaryArchive("writeBigStoredEntryUnknownSizeToFileModeAlways", + writeBigStoredEntry(false, Zip64Mode.Always), + true); + } + + private static ZipOutputTest + writeBigStoredEntryModeNever(final boolean knownSize) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + zos.setUseZip64(Zip64Mode.Never); + try { + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(FIVE_BILLION); + zae.setCrc(0x5c316f50L); + } + zae.setMethod(ZipEntry.STORED); + zos.putArchiveEntry(zae); + for (int j = 0; j < FIVE_BILLION / 1000 / 1000; j++) { + zos.write(buf); + } + zos.closeArchiveEntry(); + fail("expected a Zip64RequiredException"); + } catch (final Zip64RequiredException ex) { + assertTrue(ex.getMessage().startsWith("0's size")); + } + } + }; + } + + @Test public void writeBigStoredEntryToStreamModeNever() throws Throwable { + withTemporaryArchive("writeBigStoredEntryToStreamModeNever", + writeBigStoredEntryModeNever(true), + false); + } + + @Test public void writeBigStoredEntryKnownSizeToFileModeNever() + throws Throwable { + withTemporaryArchive("writeBigStoredEntryKnownSizeToFileModeNever", + writeBigStoredEntryModeNever(true), + true); + } + + @Test public void writeBigStoredEntryUnnownSizeToFileModeNever() + throws Throwable { + withTemporaryArchive("writeBigStoredEntryUnknownSizeToFileModeNever", + writeBigStoredEntryModeNever(false), + true); + } + + /* + * One entry of length 5 billion bytes, written with + * compression to a stream. + * + * Compression + Stream => sizes are set to 0 in LFH and ZIP64 + * entry, real values are inside the data descriptor. + * + * Creates a temporary archive of approx 4MB in size + */ + private static ZipOutputTest + writeBigDeflatedEntryToStream(final boolean knownSize, + final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, + final ZipArchiveOutputStream zos) + throws IOException { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(FIVE_BILLION); + } + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + for (int j = 0; j < FIVE_BILLION / 1000 / 1000; j++) { + zos.write(buf); + } + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + final long cfhPos = a.getFilePointer(); + // grab first entry, verify + // sizes are 0xFFFFFFFF and + // it has a ZIP64 extended + // information extra field + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals("CDH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 45, 0, + // version needed to extract + 45, 0, + // GPB (EFS + Data Descriptor) + 8, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + byte[] rest = new byte[26]; + a.readFully(rest); + assertArrayEquals("CDH rest", new byte[] { + // CRC + (byte) 0x50, (byte) 0x6F, (byte) 0x31, (byte) 0x5c, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + (byte) (mode == Zip64Mode.Always? 28 : 20), 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + }, rest); + byte[] offset = new byte[4]; + a.readFully(offset); + if (mode == Zip64Mode.Always) { + assertArrayEquals("offset", new byte[] { + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + }, offset); + } else { + assertArrayEquals("offset", new byte[] { + 0, 0, 0, 0, + }, offset); + } + assertEquals('0', a.read()); + byte[] extra = new byte[12]; + a.readFully(extra); + // 5e9 == 0x12A05F200 + assertArrayEquals("CDH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + (byte) (mode == Zip64Mode.Always? 24 : 16), 0, + // original size + 0, (byte) 0xF2, 5, (byte) 0x2A, + 1, 0, 0, 0, + }, extra); + if (mode == Zip64Mode.Always) { + // skip compressed size + a.skipBytes(8); + offset = new byte[8]; + a.readFully(offset); + assertArrayEquals("extra offset", new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, + }, offset); + } + + // validate data descriptor + a.seek(cfhPos - 24); + byte[] dd = new byte[8]; + a.readFully(dd); + assertArrayEquals("DD", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 7, 8, + // CRC + (byte) 0x50, (byte) 0x6F, (byte) 0x31, (byte) 0x5c, + }, dd); + // skip compressed size + a.skipBytes(8); + dd = new byte[8]; + a.readFully(dd); + assertArrayEquals("DD sizes", new byte[] { + // original size + 0, (byte) 0xF2, 5, (byte) 0x2A, + 1, 0, 0, 0, + }, dd); + + // and now validate local file header + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals("LFH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 45, 0, + // GPB (EFS + Data Descriptor) + 8, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + rest = new byte[17]; + a.readFully(rest); + assertArrayEquals("LFH rest", new byte[] { + // CRC + 0, 0, 0, 0, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + 20, 0, + // file name + (byte) '0' + }, rest); + extra = new byte[20]; + a.readFully(extra); + assertArrayEquals("LFH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + 16, 0, + // original size + 0, 0, 0, 0, + 0, 0, 0, 0, + // compressed size + 0, 0, 0, 0, + 0, 0, 0, 0, + }, extra); + } + } + }; + } + + @Test public void writeBigDeflatedEntryKnownSizeToStream() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryKnownSizeToStream", + writeBigDeflatedEntryToStream(true, + Zip64Mode.AsNeeded), + false); + } + + @Test public void writeBigDeflatedEntryKnownSizeToStreamModeAlways() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryKnownSizeToStreamModeAlways", + writeBigDeflatedEntryToStream(true, + Zip64Mode.Always), + false); + } + + @Test public void writeBigDeflatedEntryUnknownSizeToStreamModeAlways() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryUnknownSizeToStreamModeAlways", + writeBigDeflatedEntryToStream(false, + Zip64Mode.Always), + false); + } + + private static ZipOutputTest + writeBigDeflatedEntryUnknownSizeToStream(final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + try { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + for (int j = 0; j < FIVE_BILLION / 1000 / 1000; j++) { + zos.write(buf); + } + zos.closeArchiveEntry(); + fail("expected a Zip64RequiredException"); + } catch (final Zip64RequiredException ex) { + assertTrue(ex.getMessage().startsWith("0's size")); + } + } + }; + } + + @Test public void writeBigDeflatedEntryUnknownSizeToStream() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryUnknownSizeToStream", + writeBigDeflatedEntryUnknownSizeToStream(Zip64Mode + .AsNeeded), + false); + } + + @Test public void writeBigDeflatedEntryUnknownSizeToStreamModeNever() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryUnknownSizeToStreamModeNever", + writeBigDeflatedEntryUnknownSizeToStream(Zip64Mode + .Never), + false); + } + + private static ZipOutputTest + writeBigDeflatedEntryToFile(final boolean knownSize) { + return writeBigDeflatedEntryToFile(knownSize, Zip64Mode.AsNeeded); + } + + /* + * One entry of length 5 billion bytes, written with + * compression to a file. + * + * Writing to a file => sizes are stored directly inside the LFH. + * No Data Descriptor at all. + * + * Creates a temporary archive of approx 4MB in size + */ + private static ZipOutputTest + writeBigDeflatedEntryToFile(final boolean knownSize, + final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(FIVE_BILLION); + } + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + for (int j = 0; + j < FIVE_BILLION / 1000 / 1000; + j++) { + zos.write(buf); + } + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + // grab first entry, verify + // sizes are 0xFFFFFFFF and + // it has a ZIP64 extended + // information extra field + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals("CDH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 45, 0, + // version needed to extract + 45, 0, + // GPB (EFS + *no* Data Descriptor) + 0, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + byte[] rest = new byte[26]; + a.readFully(rest); + assertArrayEquals("CDH rest", new byte[] { + // CRC + (byte) 0x50, (byte) 0x6F, (byte) 0x31, (byte) 0x5c, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + (byte) (mode == Zip64Mode.Always? 28 : 20), 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + }, rest); + byte[] offset = new byte[4]; + a.readFully(offset); + if (mode == Zip64Mode.Always) { + assertArrayEquals("offset", new byte[] { + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + }, offset); + } else { + assertArrayEquals("offset", new byte[] { + 0, 0, 0, 0, + }, offset); + } + assertEquals('0', a.read()); + byte[] extra = new byte[12]; + a.readFully(extra); + // 5e9 == 0x12A05F200 + assertArrayEquals("CDH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + (byte) (mode == Zip64Mode.Always? 24 : 16), 0, + // original size + 0, (byte) 0xF2, 5, (byte) 0x2A, + 1, 0, 0, 0, + }, extra); + if (mode == Zip64Mode.Always) { + // skip compressed size + a.skipBytes(8); + offset = new byte[8]; + a.readFully(offset); + assertArrayEquals("extra offset", new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, + }, offset); + } + + // and now validate local file header + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals("LFH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 45, 0, + // GPB (EFS bit, no DD) + 0, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + rest = new byte[17]; + a.readFully(rest); + assertArrayEquals(new byte[] { + // CRC + (byte) 0x50, (byte) 0x6F, (byte) 0x31, (byte) 0x5c, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + 20, 0, + // file name + (byte) '0' + }, rest); + extra = new byte[12]; + a.readFully(extra); + assertArrayEquals(new byte[] { + // Header-ID + 1, 0, + // size of extra + 16, 0, + // original size + 0, (byte) 0xF2, 5, (byte) 0x2A, + 1, 0, 0, 0, + // skip compressed size + }, extra); + } + } + }; + } + + @Test public void writeBigDeflatedEntryKnownSizeToFile() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryKnownSizeToFile", + writeBigDeflatedEntryToFile(true), + true); + } + + @Test public void writeBigDeflatedEntryUnknownSizeToFile() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryUnknownSizeToFile", + writeBigDeflatedEntryToFile(false), + true); + } + + @Test public void writeBigDeflatedEntryKnownSizeToFileModeAlways() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryKnownSizeToFileModeAlways", + writeBigDeflatedEntryToFile(true, Zip64Mode.Always), + true); + } + + @Test public void writeBigDeflatedEntryUnknownSizeToFileModeAlways() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryUnknownSizeToFileModeAlways", + writeBigDeflatedEntryToFile(false, + Zip64Mode.Always), + true); + } + + @Test public void writeBigDeflatedEntryKnownSizeToStreamModeNever() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryKnownSizeToStreamModeNever", + new ZipOutputTest() { + @Override + public void test(final File f, + final ZipArchiveOutputStream zos) + throws IOException { + zos.setUseZip64(Zip64Mode.Never); + try { + final ZipArchiveEntry zae = + new ZipArchiveEntry("0"); + zae.setSize(FIVE_BILLION); + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + fail("expected a" + + " Zip64RequiredException"); + } catch (final Zip64RequiredException ex) { + assertTrue(ex.getMessage() + .startsWith("0's size")); + } + } + }, + false); + } + + /* + * One entry of length 5 billion bytes, written with + * compression to a file. + * + * Writing to a file => sizes are stored directly inside the LFH. + * No Data Descriptor at all. + * + * Creates a temporary archive of approx 4MB in size + */ + private static ZipOutputTest + writeBigDeflatedEntryToFileModeNever(final boolean knownSize) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + zos.setUseZip64(Zip64Mode.Never); + try { + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(FIVE_BILLION); + } + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + for (int j = 0; + j < FIVE_BILLION / 1000 / 1000; + j++) { + zos.write(buf); + } + zos.closeArchiveEntry(); + fail("expected a Zip64RequiredException"); + } catch (final Zip64RequiredException ex) { + assertTrue(ex.getMessage().startsWith("0's size")); + } + } + }; + } + + @Test public void writeBigDeflatedEntryKnownSizeToFileModeNever() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryKnownSizeToFileModeNever", + writeBigDeflatedEntryToFileModeNever(true), + true); + } + + @Test public void writeBigDeflatedEntryUnknownSizeToFileModeNever() + throws Throwable { + withTemporaryArchive("writeBigDeflatedEntryUnknownSizeToFileModeNever", + writeBigDeflatedEntryToFileModeNever(false), + true); + } + + private static ZipOutputTest writeSmallStoredEntry(final boolean knownSize) { + return writeSmallStoredEntry(knownSize, Zip64Mode.AsNeeded); + } + + /* + * One entry of length 1 million bytes, written without compression. + * + * No Compression => sizes are stored directly inside the LFH. No + * Data Descriptor at all. Shouldn't contain any ZIP64 extra + * field if size was known. + * + * Creates a temporary archive of approx 1MB in size + */ + private static ZipOutputTest writeSmallStoredEntry(final boolean knownSize, + final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(ONE_MILLION); + zae.setCrc(0x1279CB9EL); + } + zae.setMethod(ZipEntry.STORED); + zos.putArchiveEntry(zae); + zos.write(buf); + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + // grab first CF entry, verify sizes are 1e6 and it + // has no ZIP64 extended information extra field + // at all + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals("CDH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 20, 0, + // version needed to extract + 10, 0, + // GPB (EFS bit) + 0, 8, + // method + 0, 0 + }, header); + // ignore timestamp + a.skipBytes(4); + byte[] rest = new byte[31]; + a.readFully(rest); + // 1e6 == 0xF4240 + assertArrayEquals("CDH rest", new byte[] { + // CRC + (byte) 0x9E, (byte) 0xCB, (byte) 0x79, (byte) 0x12, + // Compressed Size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + // Original Size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + // file name length + 1, 0, + // extra field length + 0, 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + // offset + 0, 0, 0, 0, + // file name + (byte) '0' + }, rest); + + // and now validate local file header: this one + // has a ZIP64 extra field if and only if size was + // unknown and mode was not Never or the mode was + // Always (regardless of size) + final boolean hasExtra = mode == Zip64Mode.Always + || (mode == Zip64Mode.AsNeeded && !knownSize); + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals("LFH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 10, 0, + // GPB (EFS bit) + 0, 8, + // method + 0, 0 + }, header); + // ignore timestamp + a.skipBytes(4); + rest = new byte[17]; + a.readFully(rest); + // 1e6 == 0xF4240 + assertArrayEquals("LFH rest", new byte[] { + // CRC + (byte) 0x9E, (byte) 0xCB, (byte) 0x79, (byte) 0x12, + // Compressed Size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + // Original Size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + // file name length + 1, 0, + // extra field length + (byte) (!hasExtra ? 0 : 20), 0, + // file name + (byte) '0' + }, rest); + if (hasExtra) { + final byte[] extra = new byte[20]; + a.readFully(extra); + assertArrayEquals("ZIP64 extra field", new byte[] { + // Header-ID + 1, 0, + // size of extra + 16, 0, + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + // compressed size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + }, extra); + } + } + } + }; + } + + @Test public void writeSmallStoredEntryToStream() throws Throwable { + withTemporaryArchive("writeSmallStoredEntryToStream", + writeSmallStoredEntry(true), + false); + } + + @Test public void writeSmallStoredEntryKnownSizeToFile() throws Throwable { + withTemporaryArchive("writeSmallStoredEntryKnownSizeToFile", + writeSmallStoredEntry(true), + true); + } + + @Test public void writeSmallStoredEntryUnnownSizeToFile() throws Throwable { + withTemporaryArchive("writeSmallStoredEntryUnknownSizeToFile", + writeSmallStoredEntry(false), + true); + } + + @Test public void writeSmallStoredEntryToStreamModeNever() throws Throwable { + withTemporaryArchive("writeSmallStoredEntryToStreamModeNever", + writeSmallStoredEntry(true, Zip64Mode.Never), + false); + } + + @Test public void writeSmallStoredEntryKnownSizeToFileModeNever() + throws Throwable { + withTemporaryArchive("writeSmallStoredEntryKnownSizeToFileModeNever", + writeSmallStoredEntry(true, Zip64Mode.Never), + true); + } + + @Test public void writeSmallStoredEntryUnnownSizeToFileModeNever() + throws Throwable { + withTemporaryArchive("writeSmallStoredEntryUnknownSizeToFileModeNever", + writeSmallStoredEntry(false, Zip64Mode.Never), + true); + } + + /* + * One entry of length 1 million bytes, written without compression. + * + * No Compression => sizes are stored directly inside the LFH. No + * Data Descriptor at all. Contains ZIP64 extra fields because + * mode is Always + * + * Creates a temporary archive of approx 1MB in size + */ + private static ZipOutputTest + writeSmallStoredEntryModeAlways(final boolean knownSize) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + zos.setUseZip64(Zip64Mode.Always); + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(ONE_MILLION); + zae.setCrc(0x1279CB9EL); + } + zae.setMethod(ZipEntry.STORED); + zos.putArchiveEntry(zae); + zos.write(buf); + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + // grab first CF entry, verify sizes are 1e6 and it + // has an empty ZIP64 extended information extra field + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals("CDH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 45, 0, + // version needed to extract + 45, 0, + // GPB (EFS bit) + 0, 8, + // method + 0, 0 + }, header); + // ignore timestamp + a.skipBytes(4); + byte[] rest = new byte[31]; + a.readFully(rest); + // 1e6 == 0xF4240 + assertArrayEquals("CDH rest", new byte[] { + // CRC + (byte) 0x9E, (byte) 0xCB, (byte) 0x79, (byte) 0x12, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + 28, 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + // offset + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name + (byte) '0' + }, rest); + + byte[] extra = new byte[28]; + a.readFully(extra); + assertArrayEquals("CDH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + 24, 0, + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + // compressed size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }, extra); + + // and now validate local file header: this one + // has a ZIP64 extra field as the mode was + // Always + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals("LFH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 45, 0, + // GPB (EFS bit) + 0, 8, + // method + 0, 0 + }, header); + // ignore timestamp + a.skipBytes(4); + rest = new byte[17]; + a.readFully(rest); + // 1e6 == 0xF4240 + assertArrayEquals("LFH rest", new byte[] { + // CRC + (byte) 0x9E, (byte) 0xCB, (byte) 0x79, (byte) 0x12, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + 20, 0, + // file name + (byte) '0' + }, rest); + + extra = new byte[20]; + a.readFully(extra); + assertArrayEquals("LFH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + 16, 0, + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + // compressed size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + }, extra); + } + } + }; + } + + @Test public void writeSmallStoredEntryToStreamModeAlways() + throws Throwable { + withTemporaryArchive("writeSmallStoredEntryToStreamModeAlways", + writeSmallStoredEntryModeAlways(true), + false); + } + + @Test public void writeSmallStoredEntryKnownSizeToFileModeAlways() + throws Throwable { + withTemporaryArchive("writeSmallStoredEntryKnownSizeToFileModeAlways", + writeSmallStoredEntryModeAlways(true), + true); + } + + @Test public void writeSmallStoredEntryUnnownSizeToFileModeAlways() + throws Throwable { + withTemporaryArchive("writeSmallStoredEntryUnknownSizeToFileModeAlways", + writeSmallStoredEntryModeAlways(false), + true); + } + + /* + * One entry of length 1 million bytes, written with compression + * to a stream. + * + * Compression + Stream => sizes are set to 0 in LFH, real values + * are inside the data descriptor. No ZIP64 extra field at all. + */ + private static ZipOutputTest + writeSmallDeflatedEntryToStream(final boolean knownSize, + final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(ONE_MILLION); + } + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + zos.write(buf); + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + final long cfhPos = a.getFilePointer(); + // grab first entry, verify sizes are not + // 0xFFFFFFF and it has no ZIP64 extended + // information extra field + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals(new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 20, 0, + // version needed to extract + 20, 0, + // GPB (EFS + Data Descriptor) + 8, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + final byte[] crc = new byte[4]; + a.readFully(crc); + assertArrayEquals(new byte[] { + (byte) 0x9E, (byte) 0xCB, + (byte) 0x79, (byte) 0x12, + }, crc); + // skip compressed size + a.skipBytes(4); + byte[] rest = new byte[23]; + a.readFully(rest); + assertArrayEquals(new byte[] { + // Original Size + (byte) 0x40, (byte) 0x42, + (byte) 0x0F, 0, + // file name length + 1, 0, + // extra field length + 0, 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + // offset + 0, 0, 0, 0, + // file name + (byte) '0' + }, rest); + + // validate data descriptor + a.seek(cfhPos - 16); + byte[] dd = new byte[8]; + a.readFully(dd); + assertArrayEquals(new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 7, 8, + // CRC + (byte) 0x9E, (byte) 0xCB, (byte) 0x79, (byte) 0x12, + }, dd); + // skip uncompressed size + a.skipBytes(4); + dd = new byte[4]; + a.readFully(dd); + assertArrayEquals(new byte[] { + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + }, dd); + + // and now validate local file header + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals(new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 20, 0, + // GPB (EFS + Data Descriptor) + 8, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + rest = new byte[17]; + a.readFully(rest); + assertArrayEquals(new byte[] { + // CRC + 0, 0, 0, 0, + // Compressed Size + 0, 0, 0, 0, + // Original Size + 0, 0, 0, 0, + // file name length + 1, 0, + // extra field length + 0, 0, + // file name + (byte) '0' + }, rest); + } + } + }; + + } + + @Test public void writeSmallDeflatedEntryKnownSizeToStream() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryKnownSizeToStream", + writeSmallDeflatedEntryToStream(true, + Zip64Mode.AsNeeded), + false); + } + + @Test public void writeSmallDeflatedEntryKnownSizeToStreamModeNever() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryKnownSizeToStreamModeNever", + writeSmallDeflatedEntryToStream(true, + Zip64Mode.Never), + false); + } + + @Test public void writeSmallDeflatedEntryUnknownSizeToStream() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryUnknownSizeToStream", + writeSmallDeflatedEntryToStream(false, + Zip64Mode.AsNeeded), + false); + } + + @Test public void writeSmallDeflatedEntryUnknownSizeToStreamModeNever() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryUnknownSizeToStreamModeNever", + writeSmallDeflatedEntryToStream(false, + Zip64Mode.Never), + false); + } + + /* + * One entry of length 1 million bytes, written with compression + * to a stream. + * + * Compression + Stream => sizes are set to 0 in LFH, real values + * are inside the data descriptor. ZIP64 extra field as mode is Always. + */ + private static ZipOutputTest + writeSmallDeflatedEntryToStreamModeAlways(final boolean knownSize) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + zos.setUseZip64(Zip64Mode.Always); + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(ONE_MILLION); + } + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + zos.write(buf); + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + final long cfhPos = a.getFilePointer(); + // grab first entry, verify sizes are not + // 0xFFFFFFF and it has an empty ZIP64 extended + // information extra field + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals("CDH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 45, 0, + // version needed to extract + 45, 0, + // GPB (EFS + Data Descriptor) + 8, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + final byte[] crc = new byte[4]; + a.readFully(crc); + assertArrayEquals(new byte[] { + (byte) 0x9E, (byte) 0xCB, + (byte) 0x79, (byte) 0x12, + }, crc); + // skip compressed size + a.skipBytes(4); + byte[] rest = new byte[23]; + a.readFully(rest); + assertArrayEquals("CDH rest", new byte[] { + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + 28, 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + // offset + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name + (byte) '0' + }, rest); + byte[] extra = new byte[12]; + a.readFully(extra); + assertArrayEquals("CDH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + 24, 0, + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + }, extra); + // skip compressed size + a.skipBytes(8); + byte[] offset = new byte[8]; + a.readFully(offset); + assertArrayEquals("extra offset", new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, + }, offset); + + // validate data descriptor + a.seek(cfhPos - 24); + byte[] dd = new byte[8]; + a.readFully(dd); + assertArrayEquals("DD", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 7, 8, + // CRC + (byte) 0x9E, (byte) 0xCB, (byte) 0x79, (byte) 0x12, + }, dd); + // skip compressed size + a.skipBytes(8); + dd = new byte[8]; + a.readFully(dd); + assertArrayEquals("DD size", new byte[] { + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0 + }, dd); + + // and now validate local file header + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals("LFH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 45, 0, + // GPB (EFS + Data Descriptor) + 8, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + rest = new byte[17]; + a.readFully(rest); + assertArrayEquals("LFH rest", new byte[] { + // CRC + 0, 0, 0, 0, + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + 20, 0, + // file name + (byte) '0' + }, rest); + + extra = new byte[20]; + a.readFully(extra); + assertArrayEquals("LFH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + 16, 0, + // original size + 0, 0, 0, 0, + 0, 0, 0, 0, + // compressed size + 0, 0, 0, 0, + 0, 0, 0, 0, + }, extra); + } + } + }; + + } + + @Test public void writeSmallDeflatedEntryKnownSizeToStreamModeAlways() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryKnownSizeToStreamModeAlways", + writeSmallDeflatedEntryToStreamModeAlways(true), + false); + } + + @Test public void writeSmallDeflatedEntryUnknownSizeToStreamModeAlways() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryUnknownSizeToStreamModeAlways", + writeSmallDeflatedEntryToStreamModeAlways(false), + false); + } + + private static ZipOutputTest writeSmallDeflatedEntryToFile(final boolean knownSize) { + return writeSmallDeflatedEntryToFile(knownSize, Zip64Mode.AsNeeded); + } + + /* + * One entry of length 1 million bytes, written with compression + * to a file. + * + * Writing to a file => sizes are stored directly inside the LFH. + * No Data Descriptor at all. Shouldn't contain any ZIP64 extra + * field if size was known. + */ + private static ZipOutputTest + writeSmallDeflatedEntryToFile(final boolean knownSize, + final Zip64Mode mode) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + if (mode != Zip64Mode.AsNeeded) { + zos.setUseZip64(mode); + } + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(ONE_MILLION); + } + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + zos.write(buf); + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + // grab first CD entry, verify sizes are not + // 0xFFFFFFFF and it has a no ZIP64 extended + // information extra field + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals(new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 20, 0, + // version needed to extract + 20, 0, + // GPB (EFS + *no* Data Descriptor) + 0, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + byte[] crc = new byte[4]; + a.readFully(crc); + assertArrayEquals(new byte[] { + (byte) 0x9E, (byte) 0xCB, + (byte) 0x79, (byte) 0x12, + }, crc); + // skip compressed size + a.skipBytes(4); + byte[] rest = new byte[23]; + a.readFully(rest); + assertArrayEquals(new byte[] { + // Original Size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + // file name length + 1, 0, + // extra field length + 0, 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + // offset + 0, 0, 0, 0, + // file name + (byte) '0' + }, rest); + + // and now validate local file header + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals(new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 20, 0, + // GPB (EFS bit, no DD) + 0, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + crc = new byte[4]; + a.readFully(crc); + assertArrayEquals(new byte[] { + (byte) 0x9E, (byte) 0xCB, + (byte) 0x79, (byte) 0x12, + }, crc); + // skip compressed size + a.skipBytes(4); + rest = new byte[9]; + a.readFully(rest); + + final boolean hasExtra = + mode == Zip64Mode.AsNeeded && !knownSize; + + assertArrayEquals(new byte[] { + // Original Size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + // file name length + 1, 0, + // extra field length + (byte) (!hasExtra ? 0 : 20), 0, + // file name + (byte) '0' + }, rest); + if (hasExtra) { + final byte[] extra = new byte[12]; + a.readFully(extra); + assertArrayEquals(new byte[] { + // Header-ID + 1, 0, + // size of extra + 16, 0, + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + // don't know the + // compressed size, + // don't want to + // hard-code it + }, extra); + } + } + } + }; + } + + @Test public void writeSmallDeflatedEntryKnownSizeToFile() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryKnownSizeToFile", + writeSmallDeflatedEntryToFile(true), + true); + } + + @Test public void writeSmallDeflatedEntryUnknownSizeToFile() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryUnknownSizeToFile", + writeSmallDeflatedEntryToFile(false), + true); + } + + @Test public void writeSmallDeflatedEntryKnownSizeToFileModeNever() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryKnownSizeToFileModeNever", + writeSmallDeflatedEntryToFile(true, + Zip64Mode.Never), + true); + } + + @Test public void writeSmallDeflatedEntryUnknownSizeToFileModeNever() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryUnknownSizeToFileModeNever", + writeSmallDeflatedEntryToFile(false, + Zip64Mode.Never), + true); + } + + /* + * One entry of length 1 million bytes, written with compression + * to a file. + * + * Writing to a file => sizes are stored directly inside the LFH. + * No Data Descriptor at all. Must contain ZIP64 extra field as + * mode is Always. + */ + private static ZipOutputTest + writeSmallDeflatedEntryToFileModeAlways(final boolean knownSize) { + return new ZipOutputTest() { + @Override + public void test(final File f, final ZipArchiveOutputStream zos) + throws IOException { + zos.setUseZip64(Zip64Mode.Always); + final byte[] buf = new byte[ONE_MILLION]; + final ZipArchiveEntry zae = new ZipArchiveEntry("0"); + if (knownSize) { + zae.setSize(ONE_MILLION); + } + zae.setMethod(ZipEntry.DEFLATED); + zos.putArchiveEntry(zae); + zos.write(buf); + zos.closeArchiveEntry(); + zos.close(); + + try (RandomAccessFile a = new RandomAccessFile(f, "r")) { + getLengthAndPositionAtCentralDirectory(a); + + // grab first CD entry, verify sizes are not + // 0xFFFFFFFF and it has a an empty ZIP64 extended + // information extra field + byte[] header = new byte[12]; + a.readFully(header); + assertArrayEquals("CDH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 1, 2, + // version made by + 45, 0, + // version needed to extract + 45, 0, + // GPB (EFS + *no* Data Descriptor) + 0, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + byte[] crc = new byte[4]; + a.readFully(crc); + assertArrayEquals("CDH CRC", new byte[] { + (byte) 0x9E, (byte) 0xCB, (byte) 0x79, (byte) 0x12, + }, crc); + // skip compressed size + a.skipBytes(4); + byte[] rest = new byte[23]; + a.readFully(rest); + assertArrayEquals("CDH rest", new byte[] { + // Original Size + (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, + // file name length + 1, 0, + // extra field length + 28, 0, + // comment length + 0, 0, + // disk number + 0, 0, + // attributes + 0, 0, + 0, 0, 0, 0, + // offset + (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, + // file name + (byte) '0' + }, rest); + byte[] extra = new byte[12]; + a.readFully(extra); + assertArrayEquals("CDH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + 24, 0, + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + }, extra); + // skip compressed size + a.skipBytes(8); + byte[] offset = new byte[8]; + a.readFully(offset); + assertArrayEquals("extra offset", new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, + }, offset); + + // and now validate local file header + a.seek(0); + header = new byte[10]; + a.readFully(header); + assertArrayEquals("LFH start", new byte[] { + // sig + (byte) 0x50, (byte) 0x4b, 3, 4, + // version needed to extract + 45, 0, + // GPB (EFS bit, no DD) + 0, 8, + // method + 8, 0, + }, header); + // ignore timestamp + a.skipBytes(4); + crc = new byte[4]; + a.readFully(crc); + assertArrayEquals("LFH CRC", new byte[] { + (byte) 0x9E, (byte) 0xCB, + (byte) 0x79, (byte) 0x12, + }, crc); + rest = new byte[13]; + a.readFully(rest); + + assertArrayEquals("LFH rest", new byte[] { + // Compressed Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // Original Size + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + // file name length + 1, 0, + // extra field length + 20, 0, + // file name + (byte) '0' + }, rest); + + extra = new byte[12]; + a.readFully(extra); + assertArrayEquals("LFH extra", new byte[] { + // Header-ID + 1, 0, + // size of extra + 16, 0, + // original size + (byte) 0x40, (byte) 0x42, (byte) 0x0F, 0, + 0, 0, 0, 0, + // don't know the + // compressed size, + // don't want to + // hard-code it + }, extra); + } + } + }; + } + + @Test public void writeSmallDeflatedEntryKnownSizeToFileModeAlways() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryKnownSizeToFileModeAlways", + writeSmallDeflatedEntryToFileModeAlways(true), + true); + } + + @Test public void writeSmallDeflatedEntryUnknownSizeToFileModeAlways() + throws Throwable { + withTemporaryArchive("writeSmallDeflatedEntryUnknownSizeToFileModeAlways", + writeSmallDeflatedEntryToFileModeAlways(false), + true); + } + + static interface ZipOutputTest { + void test(File f, ZipArchiveOutputStream zos) throws IOException; + } + + private static void withTemporaryArchive(final String testName, + final ZipOutputTest test, + final boolean useRandomAccessFile) + throws Throwable { + final File f = getTempFile(testName); + BufferedOutputStream os = null; + final ZipArchiveOutputStream zos = useRandomAccessFile + ? new ZipArchiveOutputStream(f) + : new ZipArchiveOutputStream(os = new BufferedOutputStream(new FileOutputStream(f))); + try { + test.test(f, zos); + } catch (final IOException ex) { + System.err.println("Failed to write archive because of: " + + ex.getMessage() + + " - likely not enough disk space."); + assumeTrue(false); + } finally { + try { + zos.destroy(); + } finally { + if (os != null) { + os.close(); + } + AbstractTestCase.tryHardToDelete(f); + } + } + } + + private static File get5GBZerosFile() throws Throwable { + return getFile("5GB_of_Zeros.zip"); + } + + private static File get5GBZerosFileGeneratedBy7ZIP() throws Throwable { + return getFile("5GB_of_Zeros_7ZIP.zip"); + } + + private static File get5GBZerosFileGeneratedByJava7Jar() throws Throwable { + return getFile("5GB_of_Zeros_jar.zip"); + } + + private static File get5GBZerosFileGeneratedByWinZIP() throws Throwable { + return getFile("5GB_of_Zeros_WinZip.zip"); + } + + private static File get5GBZerosFileGeneratedByPKZip() throws Throwable { + return getFile("5GB_of_Zeros_PKZip.zip"); + } + + private static File get100KFileFile() throws Throwable { + return getFile("100k_Files.zip"); + } + + private static File get100KFileFileGeneratedBy7ZIP() throws Throwable { + return getFile("100k_Files_7ZIP.zip"); + } + + private static File get100KFileFileGeneratedByWinCF() throws Throwable { + return getFile("100k_Files_WindowsCompressedFolders.zip"); + } + + private static File get100KFileFileGeneratedByJava7Jar() throws Throwable { + return getFile("100k_Files_jar.zip"); + } + + private static File get100KFileFileGeneratedByWinZIP() throws Throwable { + return getFile("100k_Files_WinZIP.zip"); + } + + private static File get100KFileFileGeneratedByPKZip() throws Throwable { + return getFile("100k_Files_PKZip.zip"); + } + + private static File getTempFile(final String testName) throws Throwable { + final File f = File.createTempFile("commons-compress-" + testName, ".zip"); + f.deleteOnExit(); + return f; + } + + private static void read5GBOfZerosImpl(final File f, final String expectedName) + throws IOException { + final FileInputStream fin = new FileInputStream(f); + ZipArchiveInputStream zin = null; + try { + zin = new ZipArchiveInputStream(fin); + ZipArchiveEntry zae = zin.getNextZipEntry(); + while (zae.isDirectory()) { + zae = zin.getNextZipEntry(); + } + assertEquals(expectedName, zae.getName()); + final byte[] buf = new byte[1024 * 1024]; + long read = 0; + final Random r = new Random(System.currentTimeMillis()); + int readNow; + while ((readNow = zin.read(buf, 0, buf.length)) > 0) { + // testing all bytes for a value of 0 is going to take + // too long, just pick a few ones randomly + for (int i = 0; i < 1024; i++) { + final int idx = r.nextInt(readNow); + assertEquals("testing byte " + (read + idx), 0, buf[idx]); + } + read += readNow; + } + assertEquals(FIVE_BILLION, read); + assertNull(zin.getNextZipEntry()); + assertEquals(FIVE_BILLION, zae.getSize()); + } finally { + if (zin != null) { + zin.close(); + } + fin.close(); // fin cannot be null here + } + } + + private static void read5GBOfZerosUsingZipFileImpl(final File f, + final String expectedName) + throws IOException { + ZipFile zf = null; + try { + zf = new ZipFile(f); + final Enumeration<ZipArchiveEntry> e = zf.getEntries(); + assertTrue(e.hasMoreElements()); + ZipArchiveEntry zae = e.nextElement(); + while (zae.isDirectory()) { + zae = e.nextElement(); + } + assertEquals(expectedName, zae.getName()); + assertEquals(FIVE_BILLION, zae.getSize()); + final byte[] buf = new byte[1024 * 1024]; + long read = 0; + final Random r = new Random(System.currentTimeMillis()); + int readNow; + try (InputStream zin = zf.getInputStream(zae)) { + while ((readNow = zin.read(buf, 0, buf.length)) > 0) { + // testing all bytes for a value of 0 is going to take + // too long, just pick a few ones randomly + for (int i = 0; i < 1024; i++) { + final int idx = r.nextInt(readNow); + assertEquals("testing byte " + (read + idx), 0, buf[idx]); + } + read += readNow; + } + } + assertEquals(FIVE_BILLION, read); + assertFalse(e.hasMoreElements()); + } finally { + ZipFile.closeQuietly(zf); + } + } + + private static void read100KFilesImpl(final File f) throws IOException { + final FileInputStream fin = new FileInputStream(f); + ZipArchiveInputStream zin = null; + try { + zin = new ZipArchiveInputStream(fin); + int files = 0; + ZipArchiveEntry zae = null; + while ((zae = zin.getNextZipEntry()) != null) { + if (!zae.isDirectory()) { + files++; + assertEquals(0, zae.getSize()); + } + } + assertEquals(ONE_HUNDRED_THOUSAND, files); + } finally { + if (zin != null) { + zin.close(); + } + fin.close(); + } + } + + private static void read100KFilesUsingZipFileImpl(final File f) + throws IOException { + ZipFile zf = null; + try { + zf = new ZipFile(f); + int files = 0; + for (final Enumeration<ZipArchiveEntry> e = zf.getEntries(); e.hasMoreElements(); ) { + final ZipArchiveEntry zae = e.nextElement(); + if (!zae.isDirectory()) { + files++; + assertEquals(0, zae.getSize()); + } + } + assertEquals(ONE_HUNDRED_THOUSAND, files); + } finally { + ZipFile.closeQuietly(zf); + } + } + + private static long getLengthAndPositionAtCentralDirectory(final RandomAccessFile a) + throws IOException { + final long end = a.length(); + a.seek(end - 22 - 20); + final byte[] sig = new byte[4]; + a.readFully(sig); + if (sig[0] != (byte) 0x50 || sig[1] != (byte) 0x4b + || sig[2] != 6 || sig[3] != 7) { + // not a ZIP64 archive + return getLengthAndPositionAtCentralDirectory32(a, end); + } + + final long cdOffsetLoc = end - 22 - 20 - 56 + 48; + // seek to central directory locator + a.seek(cdOffsetLoc); + final byte[] cdOffset = new byte[8]; + a.readFully(cdOffset); + a.seek(ZipEightByteInteger.getLongValue(cdOffset)); + return end; + } + + private static long getLengthAndPositionAtCentralDirectory32(final RandomAccessFile a, final long end) + throws IOException { + a.seek(end - 22 + 16); + final byte[] cdOffset = new byte[4]; + a.readFully(cdOffset); + a.seek(ZipLong.getValue(cdOffset)); + return end; + } + + private static void write100KFilesToStream(final ZipArchiveOutputStream zos) + throws IOException { + for (int i = 0; i < ONE_HUNDRED_THOUSAND; i++) { + final ZipArchiveEntry zae = new ZipArchiveEntry(String.valueOf(i)); + zae.setSize(0); + zos.putArchiveEntry(zae); + zos.closeArchiveEntry(); + } + zos.close(); + } + + private static void + write3EntriesCreatingBigArchiveToStream(final ZipArchiveOutputStream zos) + throws IOException { + final byte[] buf = new byte[ONE_MILLION]; + ZipArchiveEntry zae = null; + for (int i = 0; i < 2; i++) { + zae = new ZipArchiveEntry(String.valueOf(i)); + zae.setSize(FIVE_BILLION / 2); + zae.setMethod(ZipEntry.STORED); + zae.setCrc(0x8a408f16L); + zos.putArchiveEntry(zae); + for (int j = 0; j < FIVE_BILLION / 2 / 1000 / 1000; + j++) { + zos.write(buf); + } + zos.closeArchiveEntry(); + } + zae = new ZipArchiveEntry(String.valueOf(2)); + zae.setSize(1); + zae.setMethod(ZipEntry.STORED); + zae.setCrc(0x9b9265bL); + zos.putArchiveEntry(zae); + zos.write(new byte[] { 42 }); + zos.closeArchiveEntry(); + zos.close(); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryTest.java new file mode 100644 index 000000000..7bdf54cd7 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntryTest.java @@ -0,0 +1,293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.*; + +import java.io.ByteArrayOutputStream; +import java.util.zip.ZipEntry; + +import org.junit.Test; + +/** + * JUnit testcases for org.apache.commons.compress.archivers.zip.ZipEntry. + * + */ +public class ZipArchiveEntryTest { + + /** + * test handling of extra fields + */ + @Test + public void testExtraFields() { + final AsiExtraField a = new AsiExtraField(); + a.setDirectory(true); + a.setMode(0755); + final UnrecognizedExtraField u = new UnrecognizedExtraField(); + u.setHeaderId(ExtraFieldUtilsTest.UNRECOGNIZED_HEADER); + u.setLocalFileDataData(new byte[0]); + + final ZipArchiveEntry ze = new ZipArchiveEntry("test/"); + ze.setExtraFields(new ZipExtraField[] {a, u}); + final byte[] data1 = ze.getExtra(); + ZipExtraField[] result = ze.getExtraFields(); + assertEquals("first pass", 2, result.length); + assertSame(a, result[0]); + assertSame(u, result[1]); + + final UnrecognizedExtraField u2 = new UnrecognizedExtraField(); + u2.setHeaderId(ExtraFieldUtilsTest.UNRECOGNIZED_HEADER); + u2.setLocalFileDataData(new byte[] {1}); + + ze.addExtraField(u2); + final byte[] data2 = ze.getExtra(); + result = ze.getExtraFields(); + assertEquals("second pass", 2, result.length); + assertSame(a, result[0]); + assertSame(u2, result[1]); + assertEquals("length second pass", data1.length+1, data2.length); + + final UnrecognizedExtraField u3 = new UnrecognizedExtraField(); + u3.setHeaderId(new ZipShort(2)); + u3.setLocalFileDataData(new byte[] {1}); + ze.addExtraField(u3); + result = ze.getExtraFields(); + assertEquals("third pass", 3, result.length); + + ze.removeExtraField(ExtraFieldUtilsTest.UNRECOGNIZED_HEADER); + final byte[] data3 = ze.getExtra(); + result = ze.getExtraFields(); + assertEquals("fourth pass", 2, result.length); + assertSame(a, result[0]); + assertSame(u3, result[1]); + assertEquals("length fourth pass", data2.length, data3.length); + + try { + ze.removeExtraField(ExtraFieldUtilsTest.UNRECOGNIZED_HEADER); + fail("should be no such element"); + } catch (final java.util.NoSuchElementException nse) { + } + } + + /** + * test handling of extra fields via central directory + */ + @Test + public void testExtraFieldMerging() { + final AsiExtraField a = new AsiExtraField(); + a.setDirectory(true); + a.setMode(0755); + final UnrecognizedExtraField u = new UnrecognizedExtraField(); + u.setHeaderId(ExtraFieldUtilsTest.UNRECOGNIZED_HEADER); + u.setLocalFileDataData(new byte[0]); + + final ZipArchiveEntry ze = new ZipArchiveEntry("test/"); + ze.setExtraFields(new ZipExtraField[] {a, u}); + + // merge + // Header-ID 1 + length 1 + one byte of data + final byte[] b = ExtraFieldUtilsTest.UNRECOGNIZED_HEADER.getBytes(); + ze.setCentralDirectoryExtra(new byte[] {b[0], b[1], 1, 0, 127}); + + ZipExtraField[] result = ze.getExtraFields(); + assertEquals("first pass", 2, result.length); + assertSame(a, result[0]); + assertEquals(ExtraFieldUtilsTest.UNRECOGNIZED_HEADER, + result[1].getHeaderId()); + assertEquals(new ZipShort(0), result[1].getLocalFileDataLength()); + assertEquals(new ZipShort(1), result[1].getCentralDirectoryLength()); + + // add new + // Header-ID 2 + length 0 + ze.setCentralDirectoryExtra(new byte[] {2, 0, 0, 0}); + + result = ze.getExtraFields(); + assertEquals("second pass", 3, result.length); + + // merge + // Header-ID 2 + length 1 + one byte of data + ze.setExtra(new byte[] {2, 0, 1, 0, 127}); + + result = ze.getExtraFields(); + assertEquals("third pass", 3, result.length); + assertSame(a, result[0]); + assertEquals(new ZipShort(2), result[2].getHeaderId()); + assertEquals(new ZipShort(1), result[2].getLocalFileDataLength()); + assertEquals(new ZipShort(0), result[2].getCentralDirectoryLength()); + } + + /** + * test handling of extra fields + */ + @Test + public void testAddAsFirstExtraField() { + final AsiExtraField a = new AsiExtraField(); + a.setDirectory(true); + a.setMode(0755); + final UnrecognizedExtraField u = new UnrecognizedExtraField(); + u.setHeaderId(ExtraFieldUtilsTest.UNRECOGNIZED_HEADER); + u.setLocalFileDataData(new byte[0]); + + final ZipArchiveEntry ze = new ZipArchiveEntry("test/"); + ze.setExtraFields(new ZipExtraField[] {a, u}); + final byte[] data1 = ze.getExtra(); + + final UnrecognizedExtraField u2 = new UnrecognizedExtraField(); + u2.setHeaderId(ExtraFieldUtilsTest.UNRECOGNIZED_HEADER); + u2.setLocalFileDataData(new byte[] {1}); + + ze.addAsFirstExtraField(u2); + final byte[] data2 = ze.getExtra(); + ZipExtraField[] result = ze.getExtraFields(); + assertEquals("second pass", 2, result.length); + assertSame(u2, result[0]); + assertSame(a, result[1]); + assertEquals("length second pass", data1.length + 1, data2.length); + + final UnrecognizedExtraField u3 = new UnrecognizedExtraField(); + u3.setHeaderId(new ZipShort(2)); + u3.setLocalFileDataData(new byte[] {1}); + ze.addAsFirstExtraField(u3); + result = ze.getExtraFields(); + assertEquals("third pass", 3, result.length); + assertSame(u3, result[0]); + assertSame(u2, result[1]); + assertSame(a, result[2]); + } + + @Test + public void testUnixMode() { + ZipArchiveEntry ze = new ZipArchiveEntry("foo"); + assertEquals(0, ze.getPlatform()); + ze.setUnixMode(0755); + assertEquals(3, ze.getPlatform()); + assertEquals(0755, + (ze.getExternalAttributes() >> 16) & 0xFFFF); + assertEquals(0, ze.getExternalAttributes() & 0xFFFF); + + ze.setUnixMode(0444); + assertEquals(3, ze.getPlatform()); + assertEquals(0444, + (ze.getExternalAttributes() >> 16) & 0xFFFF); + assertEquals(1, ze.getExternalAttributes() & 0xFFFF); + + ze = new ZipArchiveEntry("foo/"); + assertEquals(0, ze.getPlatform()); + ze.setUnixMode(0777); + assertEquals(3, ze.getPlatform()); + assertEquals(0777, + (ze.getExternalAttributes() >> 16) & 0xFFFF); + assertEquals(0x10, ze.getExternalAttributes() & 0xFFFF); + + ze.setUnixMode(0577); + assertEquals(3, ze.getPlatform()); + assertEquals(0577, + (ze.getExternalAttributes() >> 16) & 0xFFFF); + assertEquals(0x11, ze.getExternalAttributes() & 0xFFFF); + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-93" + * >COMPRESS-93</a>. + */ + @Test + public void testCompressionMethod() throws Exception { + final ZipArchiveOutputStream zos = + new ZipArchiveOutputStream(new ByteArrayOutputStream()); + final ZipArchiveEntry entry = new ZipArchiveEntry("foo"); + assertEquals(-1, entry.getMethod()); + assertFalse(zos.canWriteEntryData(entry)); + + entry.setMethod(ZipEntry.STORED); + assertEquals(ZipEntry.STORED, entry.getMethod()); + assertTrue(zos.canWriteEntryData(entry)); + + entry.setMethod(ZipEntry.DEFLATED); + assertEquals(ZipEntry.DEFLATED, entry.getMethod()); + assertTrue(zos.canWriteEntryData(entry)); + + // Test the unsupported "imploded" compression method (6) + entry.setMethod(6); + assertEquals(6, entry.getMethod()); + assertFalse(zos.canWriteEntryData(entry)); + zos.close(); + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-94" + * >COMPRESS-94</a>. + */ + @Test + public void testNotEquals() { + final ZipArchiveEntry entry1 = new ZipArchiveEntry("foo"); + final ZipArchiveEntry entry2 = new ZipArchiveEntry("bar"); + assertFalse(entry1.equals(entry2)); + } + + /** + * Tests comment's influence on equals comparisons. + * @see "https://issues.apache.org/jira/browse/COMPRESS-187" + */ + @Test + public void testNullCommentEqualsEmptyComment() { + final ZipArchiveEntry entry1 = new ZipArchiveEntry("foo"); + final ZipArchiveEntry entry2 = new ZipArchiveEntry("foo"); + final ZipArchiveEntry entry3 = new ZipArchiveEntry("foo"); + entry1.setComment(null); + entry2.setComment(""); + entry3.setComment("bar"); + assertEquals(entry1, entry2); + assertFalse(entry1.equals(entry3)); + assertFalse(entry2.equals(entry3)); + } + + @Test + public void testCopyConstructor() throws Exception { + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry("fred"); + archiveEntry.setUnixMode(0664); + archiveEntry.setMethod(ZipEntry.DEFLATED); + archiveEntry.getGeneralPurposeBit().useStrongEncryption(true); + final ZipArchiveEntry copy = new ZipArchiveEntry(archiveEntry); + assertEquals(archiveEntry, copy); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-379" + */ + @Test + public void isUnixSymlinkIsFalseIfMoreThanOneFlagIsSet() throws Exception { + try (ZipFile zf = new ZipFile(getFile("COMPRESS-379.jar"))) { + ZipArchiveEntry ze = zf.getEntry("META-INF/maven/"); + assertFalse(ze.isUnixSymlink()); + } + } + + @Test + public void testIsUnixSymlink() { + ZipArchiveEntry ze = new ZipArchiveEntry("foo"); + ze.setUnixMode(UnixStat.LINK_FLAG); + assertTrue(ze.isUnixSymlink()); + ze.setUnixMode(UnixStat.LINK_FLAG | UnixStat.DIR_FLAG); + assertFalse(ze.isUnixSymlink()); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStreamTest.java new file mode 100644 index 000000000..b9395fbfd --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStreamTest.java @@ -0,0 +1,516 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.zip.ZipException; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class ZipArchiveInputStreamTest { + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-176" + */ + @Test + public void winzipBackSlashWorkaround() throws Exception { + ZipArchiveInputStream in = null; + try { + in = new ZipArchiveInputStream(new FileInputStream(getFile("test-winzip.zip"))); + ZipArchiveEntry zae = in.getNextZipEntry(); + zae = in.getNextZipEntry(); + zae = in.getNextZipEntry(); + assertEquals("\u00e4/", zae.getName()); + } finally { + if (in != null) { + in.close(); + } + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-189" + */ + @Test + public void properUseOfInflater() throws Exception { + ZipFile zf = null; + ZipArchiveInputStream in = null; + try { + zf = new ZipFile(getFile("COMPRESS-189.zip")); + final ZipArchiveEntry zae = zf.getEntry("USD0558682-20080101.ZIP"); + in = new ZipArchiveInputStream(new BufferedInputStream(zf.getInputStream(zae))); + ZipArchiveEntry innerEntry; + while ((innerEntry = in.getNextZipEntry()) != null) { + if (innerEntry.getName().endsWith("XML")) { + assertTrue(0 < in.read()); + } + } + } finally { + if (zf != null) { + zf.close(); + } + if (in != null) { + in.close(); + } + } + } + + @Test + public void shouldConsumeArchiveCompletely() throws Exception { + final InputStream is = ZipArchiveInputStreamTest.class + .getResourceAsStream("/archive_with_trailer.zip"); + final ZipArchiveInputStream zip = new ZipArchiveInputStream(is); + while (zip.getNextZipEntry() != null) { + // just consume the archive + } + final byte[] expected = new byte[] { + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', '\n' + }; + final byte[] actual = new byte[expected.length]; + is.read(actual); + assertArrayEquals(expected, actual); + zip.close(); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-219" + */ + @Test + public void shouldReadNestedZip() throws IOException { + ZipArchiveInputStream in = null; + try { + in = new ZipArchiveInputStream(new FileInputStream(getFile("COMPRESS-219.zip"))); + extractZipInputStream(in); + } finally { + if (in != null) { + in.close(); + } + } + } + + private void extractZipInputStream(final ZipArchiveInputStream in) + throws IOException { + ZipArchiveEntry zae = in.getNextZipEntry(); + while (zae != null) { + if (zae.getName().endsWith(".zip")) { + extractZipInputStream(new ZipArchiveInputStream(in)); + } + zae = in.getNextZipEntry(); + } + } + + @Test + public void testUnshrinkEntry() throws Exception { + final ZipArchiveInputStream in = new ZipArchiveInputStream(new FileInputStream(getFile("SHRUNK.ZIP"))); + + ZipArchiveEntry entry = in.getNextZipEntry(); + assertEquals("method", ZipMethod.UNSHRINKING.getCode(), entry.getMethod()); + assertTrue(in.canReadEntryData(entry)); + + FileInputStream original = new FileInputStream(getFile("test1.xml")); + try { + assertArrayEquals(IOUtils.toByteArray(original), IOUtils.toByteArray(in)); + } finally { + original.close(); + } + + entry = in.getNextZipEntry(); + assertEquals("method", ZipMethod.UNSHRINKING.getCode(), entry.getMethod()); + assertTrue(in.canReadEntryData(entry)); + + original = new FileInputStream(getFile("test2.xml")); + try { + assertArrayEquals(IOUtils.toByteArray(original), IOUtils.toByteArray(in)); + } finally { + original.close(); + } + } + + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-264" + * >COMPRESS-264</a>. + */ + @Test + public void testReadingOfFirstStoredEntry() throws Exception { + + try (ZipArchiveInputStream in = new ZipArchiveInputStream(new FileInputStream(getFile("COMPRESS-264.zip")))) { + final ZipArchiveEntry ze = in.getNextZipEntry(); + assertEquals(5, ze.getSize()); + assertArrayEquals(new byte[] { 'd', 'a', 't', 'a', '\n' }, + IOUtils.toByteArray(in)); + } + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-351" + * >COMPRESS-351</a>. + */ + @Test + public void testMessageWithCorruptFileName() throws Exception { + try (ZipArchiveInputStream in = new ZipArchiveInputStream(new FileInputStream(getFile("COMPRESS-351.zip")))) { + ZipArchiveEntry ze = in.getNextZipEntry(); + while (ze != null) { + ze = in.getNextZipEntry(); + } + fail("expected EOFException"); + } catch (final EOFException ex) { + final String m = ex.getMessage(); + assertTrue(m.startsWith("Truncated ZIP entry: ?2016")); // the first character is not printable + } + } + + @Test + public void testUnzipBZip2CompressedEntry() throws Exception { + + try (ZipArchiveInputStream in = new ZipArchiveInputStream(new FileInputStream(getFile("bzip2-zip.zip")))) { + final ZipArchiveEntry ze = in.getNextZipEntry(); + assertEquals(42, ze.getSize()); + final byte[] expected = new byte[42]; + Arrays.fill(expected, (byte) 'a'); + assertArrayEquals(expected, IOUtils.toByteArray(in)); + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-380" + */ + @Test + public void readDeflate64CompressedStream() throws Exception { + final File input = getFile("COMPRESS-380/COMPRESS-380-input"); + final File archive = getFile("COMPRESS-380/COMPRESS-380.zip"); + try (FileInputStream in = new FileInputStream(input); + ZipArchiveInputStream zin = new ZipArchiveInputStream(new FileInputStream(archive))) { + byte[] orig = IOUtils.toByteArray(in); + ZipArchiveEntry e = zin.getNextZipEntry(); + byte[] fromZip = IOUtils.toByteArray(zin); + assertArrayEquals(orig, fromZip); + } + } + + @Test + public void readDeflate64CompressedStreamWithDataDescriptor() throws Exception { + // this is a copy of bla.jar with META-INF/MANIFEST.MF's method manually changed to ENHANCED_DEFLATED + final File archive = getFile("COMPRESS-380/COMPRESS-380-dd.zip"); + try (ZipArchiveInputStream zin = new ZipArchiveInputStream(new FileInputStream(archive))) { + ZipArchiveEntry e = zin.getNextZipEntry(); + assertEquals(-1, e.getSize()); + assertEquals(ZipMethod.ENHANCED_DEFLATED.getCode(), e.getMethod()); + byte[] fromZip = IOUtils.toByteArray(zin); + byte[] expected = new byte[] { + 'M', 'a', 'n', 'i', 'f', 'e', 's', 't', '-', 'V', 'e', 'r', 's', 'i', 'o', 'n', ':', ' ', '1', '.', '0', + '\r', '\n', '\r', '\n' + }; + assertArrayEquals(expected, fromZip); + zin.getNextZipEntry(); + assertEquals(25, e.getSize()); + } + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-364" + * >COMPRESS-364</a>. + */ + @Test + public void testWithBytesAfterData() throws Exception { + final int expectedNumEntries = 2; + final InputStream is = ZipArchiveInputStreamTest.class + .getResourceAsStream("/archive_with_bytes_after_data.zip"); + final ZipArchiveInputStream zip = new ZipArchiveInputStream(is); + + try { + int actualNumEntries = 0; + ZipArchiveEntry zae = zip.getNextZipEntry(); + while (zae != null) { + actualNumEntries++; + readEntry(zip, zae); + zae = zip.getNextZipEntry(); + } + assertEquals(expectedNumEntries, actualNumEntries); + } finally { + zip.close(); + } + } + + /** + * <code>getNextZipEntry()</code> should throw a <code>ZipException</code> rather than return + * <code>null</code> when an unexpected structure is encountered. + */ + @Test + public void testThrowOnInvalidEntry() throws Exception { + final InputStream is = ZipArchiveInputStreamTest.class + .getResourceAsStream("/invalid-zip.zip"); + final ZipArchiveInputStream zip = new ZipArchiveInputStream(is); + + try { + zip.getNextZipEntry(); + fail("IOException expected"); + } catch (ZipException expected) { + assertTrue(expected.getMessage().contains("Unexpected record signature")); + } finally { + zip.close(); + } + } + + /** + * Test correct population of header and data offsets. + */ + @Test + public void testOffsets() throws Exception { + // mixed.zip contains both inflated and stored files + try (InputStream archiveStream = ZipArchiveInputStream.class.getResourceAsStream("/mixed.zip"); + ZipArchiveInputStream zipStream = new ZipArchiveInputStream((archiveStream)) + ) { + ZipArchiveEntry inflatedEntry = zipStream.getNextZipEntry(); + Assert.assertEquals("inflated.txt", inflatedEntry.getName()); + Assert.assertEquals(0x0000, inflatedEntry.getLocalHeaderOffset()); + Assert.assertEquals(0x0046, inflatedEntry.getDataOffset()); + ZipArchiveEntry storedEntry = zipStream.getNextZipEntry(); + Assert.assertEquals("stored.txt", storedEntry.getName()); + Assert.assertEquals(0x5892, storedEntry.getLocalHeaderOffset()); + Assert.assertEquals(0x58d6, storedEntry.getDataOffset()); + Assert.assertNull(zipStream.getNextZipEntry()); + } + } + + @Test + public void nameSourceDefaultsToName() throws Exception { + nameSource("bla.zip", "test1.xml", ZipArchiveEntry.NameSource.NAME); + } + + @Test + public void nameSourceIsSetToUnicodeExtraField() throws Exception { + nameSource("utf8-winzip-test.zip", "\u20AC_for_Dollar.txt", + ZipArchiveEntry.NameSource.UNICODE_EXTRA_FIELD); + } + + @Test + public void nameSourceIsSetToEFS() throws Exception { + nameSource("utf8-7zip-test.zip", "\u20AC_for_Dollar.txt", 3, + ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); + } + + @Test + public void properlyMarksEntriesAsUnreadableIfUncompressedSizeIsUnknown() throws Exception { + // we never read any data + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteArrayInputStream(new byte[0]))) { + ZipArchiveEntry e = new ZipArchiveEntry("test"); + e.setMethod(ZipMethod.DEFLATED.getCode()); + assertTrue(zis.canReadEntryData(e)); + e.setMethod(ZipMethod.ENHANCED_DEFLATED.getCode()); + assertTrue(zis.canReadEntryData(e)); + e.setMethod(ZipMethod.BZIP2.getCode()); + assertFalse(zis.canReadEntryData(e)); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("bla.zip")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingStore() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("COMPRESS-264.zip")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingUnshrink() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("SHRUNK.ZIP")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingExplode() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("imploding-8Kdict-3trees.zip")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate64() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("COMPRESS-380/COMPRESS-380.zip")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingBzip2() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("bzip2-zip.zip")); + } + + private void singleByteReadConsistentlyReturnsMinusOneAtEof(File file) throws Exception { + try (FileInputStream in = new FileInputStream(file); + ZipArchiveInputStream archive = new ZipArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read()); + assertEquals(-1, archive.read()); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("bla.zip")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingStore() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("COMPRESS-264.zip")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingUnshrink() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("SHRUNK.ZIP")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingExplode() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("imploding-8Kdict-3trees.zip")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate64() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("COMPRESS-380/COMPRESS-380.zip")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingBzip2() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("bzip2-zip.zip")); + } + + private void multiByteReadConsistentlyReturnsMinusOneAtEof(File file) throws Exception { + byte[] buf = new byte[2]; + try (FileInputStream in = new FileInputStream(getFile("bla.zip")); + ZipArchiveInputStream archive = new ZipArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + IOUtils.toByteArray(archive); + assertEquals(-1, archive.read(buf)); + assertEquals(-1, archive.read(buf)); + } + } + + @Test + public void singleByteReadThrowsAtEofForCorruptedStoredEntry() throws Exception { + byte[] content; + try (FileInputStream fs = new FileInputStream(getFile("COMPRESS-264.zip"))) { + content = IOUtils.toByteArray(fs); + } + // make size much bigger than entry's real size + for (int i = 17; i < 26; i++) { + content[i] = (byte) 0xff; + } + try (ByteArrayInputStream in = new ByteArrayInputStream(content); + ZipArchiveInputStream archive = new ZipArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + try { + IOUtils.toByteArray(archive); + fail("expected exception"); + } catch (IOException ex) { + assertEquals("Truncated ZIP file", ex.getMessage()); + } + try { + archive.read(); + fail("expected exception"); + } catch (IOException ex) { + assertEquals("Truncated ZIP file", ex.getMessage()); + } + try { + archive.read(); + fail("expected exception"); + } catch (IOException ex) { + assertEquals("Truncated ZIP file", ex.getMessage()); + } + } + } + + @Test + public void multiByteReadThrowsAtEofForCorruptedStoredEntry() throws Exception { + byte[] content; + try (FileInputStream fs = new FileInputStream(getFile("COMPRESS-264.zip"))) { + content = IOUtils.toByteArray(fs); + } + // make size much bigger than entry's real size + for (int i = 17; i < 26; i++) { + content[i] = (byte) 0xff; + } + byte[] buf = new byte[2]; + try (ByteArrayInputStream in = new ByteArrayInputStream(content); + ZipArchiveInputStream archive = new ZipArchiveInputStream(in)) { + ArchiveEntry e = archive.getNextEntry(); + try { + IOUtils.toByteArray(archive); + fail("expected exception"); + } catch (IOException ex) { + assertEquals("Truncated ZIP file", ex.getMessage()); + } + try { + archive.read(buf); + fail("expected exception"); + } catch (IOException ex) { + assertEquals("Truncated ZIP file", ex.getMessage()); + } + try { + archive.read(buf); + fail("expected exception"); + } catch (IOException ex) { + assertEquals("Truncated ZIP file", ex.getMessage()); + } + } + } + + private static byte[] readEntry(ZipArchiveInputStream zip, ZipArchiveEntry zae) throws IOException { + final int len = (int)zae.getSize(); + final byte[] buff = new byte[len]; + zip.read(buff, 0, len); + + return buff; + } + + private static void nameSource(String archive, String entry, ZipArchiveEntry.NameSource expected) throws Exception { + nameSource(archive, entry, 1, expected); + } + + private static void nameSource(String archive, String entry, int entryNo, ZipArchiveEntry.NameSource expected) + throws Exception { + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new FileInputStream(getFile(archive)))) { + ZipArchiveEntry ze; + do { + ze = zis.getNextZipEntry(); + } while (--entryNo > 0); + assertEquals(entry, ze.getName()); + assertEquals(expected, ze.getNameSource()); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipClassCoverageTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipClassCoverageTest.java new file mode 100644 index 000000000..0595877da --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipClassCoverageTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import org.hamcrest.core.IsInstanceOf; +import org.junit.Test; + +public class ZipClassCoverageTest { + + @Test + public void testConstructZip64RequiredException() { + Zip64RequiredException e = new Zip64RequiredException("critique of pure"); + assertNotNull(e); + } + @Test + public void testMessageException() { + ZipArchiveEntry ze = new ZipArchiveEntry("hello"); + String entryTooBigMessage = Zip64RequiredException.getEntryTooBigMessage(ze); + assertEquals("hello's size exceeds the limit of 4GByte.", + entryTooBigMessage); + } + + @Test + public void testConstantConstructor() + throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException { + Class<ZipConstants> clazz = ZipConstants.class; + Constructor<ZipConstants> constructor = clazz.getDeclaredConstructor(); + assertFalse(constructor.isAccessible()); + constructor.setAccessible(true); + Object o = constructor.newInstance(); + assertThat(o, IsInstanceOf.instanceOf(clazz)); + constructor.setAccessible(false); + + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEightByteIntegerTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEightByteIntegerTest.java new file mode 100644 index 000000000..e38f81997 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEightByteIntegerTest.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.math.BigInteger; + +import org.junit.Test; + +/** + * JUnit testcases for org.apache.commons.compress.archivers.zip.ZipEightByteInteger. + * + */ +public class ZipEightByteIntegerTest { + + /** + * Test conversion to bytes. + */ + @Test + public void testLongToBytes() { + final ZipEightByteInteger zl = new ZipEightByteInteger(0xAB12345678l); + final byte[] result = zl.getBytes(); + assertEquals("length getBytes", 8, result.length); + assertEquals("first byte getBytes", 0x78, result[0]); + assertEquals("second byte getBytes", 0x56, result[1]); + assertEquals("third byte getBytes", 0x34, result[2]); + assertEquals("fourth byte getBytes", 0x12, result[3]); + assertEquals("fifth byte getBytes", (byte) 0xAB, result[4]); + assertEquals("sixth byte getBytes", 0, result[5]); + assertEquals("seventh byte getBytes", 0, result[6]); + assertEquals("eighth byte getBytes", 0, result[7]); + } + + /** + * Test conversion from bytes. + */ + @Test + public void testLongFromBytes() { + final byte[] val = new byte[] {0x78, 0x56, 0x34, 0x12, (byte) 0xAB, 0x00, 0x00, 0x00}; + final ZipEightByteInteger zl = new ZipEightByteInteger(val); + assertEquals("longValue from bytes", 0xAB12345678l, zl.getLongValue()); + } + + /** + * Test conversion to bytes. + */ + @Test + public void testBIToBytes() { + final ZipEightByteInteger zl = + new ZipEightByteInteger(BigInteger.valueOf(Long.MAX_VALUE) + .shiftLeft(1)); + final byte[] result = zl.getBytes(); + assertEquals("length getBytes", 8, result.length); + assertEquals("first byte getBytes", (byte) 0xFE, result[0]); + assertEquals("second byte getBytes", (byte) 0xFF, result[1]); + assertEquals("third byte getBytes", (byte) 0xFF, result[2]); + assertEquals("fourth byte getBytes", (byte) 0xFF, result[3]); + assertEquals("fifth byte getBytes", (byte) 0xFF, result[4]); + assertEquals("sixth byte getBytes", (byte) 0xFF, result[5]); + assertEquals("seventh byte getBytes", (byte) 0xFF, result[6]); + assertEquals("eighth byte getBytes", (byte) 0xFF, result[7]); + } + + /** + * Test conversion from bytes. + */ + @Test + public void testBIFromBytes() { + final byte[] val = new byte[] {(byte) 0xFE, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}; + final ZipEightByteInteger zl = new ZipEightByteInteger(val); + assertEquals("value from bytes", + BigInteger.valueOf(Long.MAX_VALUE).shiftLeft(1), + zl.getValue()); + } + + /** + * Test the contract of the equals method. + */ + @Test + public void testEquals() { + final ZipEightByteInteger zl = new ZipEightByteInteger(0x12345678); + final ZipEightByteInteger zl2 = new ZipEightByteInteger(0x12345678); + final ZipEightByteInteger zl3 = new ZipEightByteInteger(0x87654321); + + assertTrue("reflexive", zl.equals(zl)); + + assertTrue("works", zl.equals(zl2)); + assertTrue("works, part two", !zl.equals(zl3)); + + assertTrue("symmetric", zl2.equals(zl)); + + assertTrue("null handling", !zl.equals(null)); + assertTrue("non ZipEightByteInteger handling", !zl.equals(new Integer(0x1234))); + } + + /** + * Test sign handling. + */ + @Test + public void testSign() { + final ZipEightByteInteger zl = new ZipEightByteInteger(new byte[] {(byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF}); + assertEquals(BigInteger.valueOf(Long.MAX_VALUE).shiftLeft(1).setBit(0), + zl.getValue()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java new file mode 100644 index 000000000..1f798d0bc --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import org.hamcrest.core.IsInstanceOf; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test zip encodings. + */ +public class ZipEncodingTest { + + private static final String UNENC_STRING = "\u2016"; + + // stress test for internal grow method. + private static final String BAD_STRING = + "\u2016\u2015\u2016\u2015\u2016\u2015\u2016\u2015\u2016\u2015\u2016"; + + private static final String BAD_STRING_ENC = + "%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016"; + + @Test + public void testNothingToMakeCoverallsHappier() { + Object o = new ZipEncodingHelper() { + }; + assertNotNull(o); + } + + @Test + public void testGetNonexistentEncodng() throws IOException { + ZipEncoding ze = ZipEncodingHelper.getZipEncoding("I-am-a-banana"); + assertNotNull(ze); + if (ze instanceof CharsetAccessor) { + CharsetAccessor hasCharset = (CharsetAccessor) ze; + Assert.assertEquals(Charset.defaultCharset(), hasCharset.getCharset()); + } + } + + @Test + public void testIsUTF8() throws IOException { + assertTrue(ZipEncodingHelper.isUTF8("UTF-8")); + assertTrue(ZipEncodingHelper.isUTF8("UTF8")); + Assert.assertEquals(Charset.defaultCharset().name().equals("UTF-8"), ZipEncodingHelper.isUTF8(null)); + } + + @Test + public void testSimpleCp437Encoding() throws IOException { + doSimpleEncodingsTest(437); + } + + @Test + public void testSimpleCp850Encoding() throws IOException { + doSimpleEncodingsTest(850); + } + + @Test + public void testEbcidic() throws IOException { + + doSimpleEncodingTest("IBM1047", null); + } + + + private void doSimpleEncodingsTest(int n) throws IOException { + + doSimpleEncodingTest("Cp" + n, null); + doSimpleEncodingTest("cp" + n, null); + doSimpleEncodingTest("CP" + n, null); + doSimpleEncodingTest("IBM" + n, null); + doSimpleEncodingTest("ibm" + n, null); + } + + @Test + public void testNioCp1252Encoding() throws IOException { + // CP1252 has some undefined code points, these are + // the defined ones + // retrieved by + // awk '/^0x/ && NF>2 {print $1;}' CP1252.TXT + final byte[] b = + new byte[] { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + (byte) 0x80, (byte) 0x82, (byte) 0x83, (byte) 0x84, + (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, + (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C, + (byte) 0x8E, (byte) 0x91, (byte) 0x92, (byte) 0x93, + (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, + (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B, + (byte) 0x9C, (byte) 0x9E, (byte) 0x9F, (byte) 0xA0, + (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, + (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, + (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC, + (byte) 0xAD, (byte) 0xAE, (byte) 0xAF, (byte) 0xB0, + (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, + (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8, + (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, + (byte) 0xBD, (byte) 0xBE, (byte) 0xBF, (byte) 0xC0, + (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, + (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8, + (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, + (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, (byte) 0xD0, + (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, + (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8, + (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, + (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0xE0, + (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, + (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8, + (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, + (byte) 0xED, (byte) 0xEE, (byte) 0xEF, (byte) 0xF0, + (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, + (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8, + (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, + (byte) 0xFD, (byte) 0xFE, (byte) 0xFF }; + + doSimpleEncodingTest("Cp1252",b); + } + + private static void assertEquals(final byte[] expected, final ByteBuffer actual) { + + Assert.assertEquals(expected.length, actual.limit()); + + for (final byte anExpected : expected) { + final byte a = actual.get(); + Assert.assertEquals(anExpected, a); + } + + } + + private void doSimpleEncodingTest(final String name, byte[] testBytes) + throws IOException { + + final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(name); + assertThat(enc, IsInstanceOf.instanceOf(NioZipEncoding.class)); + if (testBytes == null) { + + testBytes = new byte[256]; + for (int i = 0; i < 256; ++i) { + testBytes[i] = (byte) i; + } + } + + final String decoded = enc.decode(testBytes); + + assertTrue(enc.canEncode(decoded)); + + final ByteBuffer encoded = enc.encode(decoded); + + assertEquals(testBytes, encoded); + + assertFalse(enc.canEncode(UNENC_STRING)); + assertEquals("%U2016".getBytes(name), enc.encode(UNENC_STRING)); + assertFalse(enc.canEncode(BAD_STRING)); + assertEquals(BAD_STRING_ENC.getBytes(name), enc.encode(BAD_STRING)); + } + +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java new file mode 100644 index 000000000..cb6d1975e --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java @@ -0,0 +1,802 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.*; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.zip.CRC32; +import java.util.zip.ZipEntry; + +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; + +public class ZipFileTest { + private ZipFile zf = null; + + @After + public void tearDown() { + ZipFile.closeQuietly(zf); + } + + @Test + public void testCDOrder() throws Exception { + readOrderTest(); + final ArrayList<ZipArchiveEntry> l = Collections.list(zf.getEntries()); + assertEntryName(l, 0, "AbstractUnicodeExtraField"); + assertEntryName(l, 1, "AsiExtraField"); + assertEntryName(l, 2, "ExtraFieldUtils"); + assertEntryName(l, 3, "FallbackZipEncoding"); + assertEntryName(l, 4, "GeneralPurposeBit"); + assertEntryName(l, 5, "JarMarker"); + assertEntryName(l, 6, "NioZipEncoding"); + assertEntryName(l, 7, "Simple8BitZipEncoding"); + assertEntryName(l, 8, "UnicodeCommentExtraField"); + assertEntryName(l, 9, "UnicodePathExtraField"); + assertEntryName(l, 10, "UnixStat"); + assertEntryName(l, 11, "UnparseableExtraFieldData"); + assertEntryName(l, 12, "UnrecognizedExtraField"); + assertEntryName(l, 13, "ZipArchiveEntry"); + assertEntryName(l, 14, "ZipArchiveInputStream"); + assertEntryName(l, 15, "ZipArchiveOutputStream"); + assertEntryName(l, 16, "ZipEncoding"); + assertEntryName(l, 17, "ZipEncodingHelper"); + assertEntryName(l, 18, "ZipExtraField"); + assertEntryName(l, 19, "ZipUtil"); + assertEntryName(l, 20, "ZipLong"); + assertEntryName(l, 21, "ZipShort"); + assertEntryName(l, 22, "ZipFile"); + } + + @Test + public void testCDOrderInMemory() throws Exception { + byte[] data = null; + try (FileInputStream fis = new FileInputStream(getFile("ordertest.zip"))) { + data = IOUtils.toByteArray(fis); + } + + zf = new ZipFile(new SeekableInMemoryByteChannel(data), ZipEncodingHelper.UTF8); + final ArrayList<ZipArchiveEntry> l = Collections.list(zf.getEntries()); + assertEntryName(l, 0, "AbstractUnicodeExtraField"); + assertEntryName(l, 1, "AsiExtraField"); + assertEntryName(l, 2, "ExtraFieldUtils"); + assertEntryName(l, 3, "FallbackZipEncoding"); + assertEntryName(l, 4, "GeneralPurposeBit"); + assertEntryName(l, 5, "JarMarker"); + assertEntryName(l, 6, "NioZipEncoding"); + assertEntryName(l, 7, "Simple8BitZipEncoding"); + assertEntryName(l, 8, "UnicodeCommentExtraField"); + assertEntryName(l, 9, "UnicodePathExtraField"); + assertEntryName(l, 10, "UnixStat"); + assertEntryName(l, 11, "UnparseableExtraFieldData"); + assertEntryName(l, 12, "UnrecognizedExtraField"); + assertEntryName(l, 13, "ZipArchiveEntry"); + assertEntryName(l, 14, "ZipArchiveInputStream"); + assertEntryName(l, 15, "ZipArchiveOutputStream"); + assertEntryName(l, 16, "ZipEncoding"); + assertEntryName(l, 17, "ZipEncodingHelper"); + assertEntryName(l, 18, "ZipExtraField"); + assertEntryName(l, 19, "ZipUtil"); + assertEntryName(l, 20, "ZipLong"); + assertEntryName(l, 21, "ZipShort"); + assertEntryName(l, 22, "ZipFile"); + } + + @Test + public void testPhysicalOrder() throws Exception { + readOrderTest(); + final ArrayList<ZipArchiveEntry> l = Collections.list(zf.getEntriesInPhysicalOrder()); + assertEntryName(l, 0, "AbstractUnicodeExtraField"); + assertEntryName(l, 1, "AsiExtraField"); + assertEntryName(l, 2, "ExtraFieldUtils"); + assertEntryName(l, 3, "FallbackZipEncoding"); + assertEntryName(l, 4, "GeneralPurposeBit"); + assertEntryName(l, 5, "JarMarker"); + assertEntryName(l, 6, "NioZipEncoding"); + assertEntryName(l, 7, "Simple8BitZipEncoding"); + assertEntryName(l, 8, "UnicodeCommentExtraField"); + assertEntryName(l, 9, "UnicodePathExtraField"); + assertEntryName(l, 10, "UnixStat"); + assertEntryName(l, 11, "UnparseableExtraFieldData"); + assertEntryName(l, 12, "UnrecognizedExtraField"); + assertEntryName(l, 13, "ZipArchiveEntry"); + assertEntryName(l, 14, "ZipArchiveInputStream"); + assertEntryName(l, 15, "ZipArchiveOutputStream"); + assertEntryName(l, 16, "ZipEncoding"); + assertEntryName(l, 17, "ZipEncodingHelper"); + assertEntryName(l, 18, "ZipExtraField"); + assertEntryName(l, 19, "ZipFile"); + assertEntryName(l, 20, "ZipLong"); + assertEntryName(l, 21, "ZipShort"); + assertEntryName(l, 22, "ZipUtil"); + } + + @Test + public void testDoubleClose() throws Exception { + readOrderTest(); + zf.close(); + try { + zf.close(); + } catch (final Exception ex) { + fail("Caught exception of second close"); + } + } + + @Test + public void testReadingOfStoredEntry() throws Exception { + final File f = File.createTempFile("commons-compress-zipfiletest", ".zip"); + f.deleteOnExit(); + OutputStream o = null; + InputStream i = null; + try { + o = new FileOutputStream(f); + final ZipArchiveOutputStream zo = new ZipArchiveOutputStream(o); + ZipArchiveEntry ze = new ZipArchiveEntry("foo"); + ze.setMethod(ZipEntry.STORED); + ze.setSize(4); + ze.setCrc(0xb63cfbcdl); + zo.putArchiveEntry(ze); + zo.write(new byte[] { 1, 2, 3, 4 }); + zo.closeArchiveEntry(); + zo.close(); + o.close(); + o = null; + + zf = new ZipFile(f); + ze = zf.getEntry("foo"); + assertNotNull(ze); + i = zf.getInputStream(ze); + final byte[] b = new byte[4]; + assertEquals(4, i.read(b)); + assertEquals(-1, i.read()); + } finally { + if (o != null) { + o.close(); + } + if (i != null) { + i.close(); + } + f.delete(); + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-176" + */ + @Test + public void testWinzipBackSlashWorkaround() throws Exception { + final File archive = getFile("test-winzip.zip"); + zf = new ZipFile(archive); + assertNull(zf.getEntry("\u00e4\\\u00fc.txt")); + assertNotNull(zf.getEntry("\u00e4/\u00fc.txt")); + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-208" + * >COMPRESS-208</a>. + */ + @Test + public void testSkipsPK00Prefix() throws Exception { + final File archive = getFile("COMPRESS-208.zip"); + zf = new ZipFile(archive); + assertNotNull(zf.getEntry("test1.xml")); + assertNotNull(zf.getEntry("test2.xml")); + } + + @Test + public void testUnixSymlinkSampleFile() throws Exception { + final String entryPrefix = "COMPRESS-214_unix_symlinks/"; + final TreeMap<String, String> expectedVals = new TreeMap<>(); + + // I threw in some Japanese characters to keep things interesting. + expectedVals.put(entryPrefix + "link1", "../COMPRESS-214_unix_symlinks/./a/b/c/../../../\uF999"); + expectedVals.put(entryPrefix + "link2", "../COMPRESS-214_unix_symlinks/./a/b/c/../../../g"); + expectedVals.put(entryPrefix + "link3", "../COMPRESS-214_unix_symlinks/././a/b/c/../../../\u76F4\u6A39"); + expectedVals.put(entryPrefix + "link4", "\u82B1\u5B50/\u745B\u5B50"); + expectedVals.put(entryPrefix + "\uF999", "./\u82B1\u5B50/\u745B\u5B50/\u5897\u8C37/\uF999"); + expectedVals.put(entryPrefix + "g", "./a/b/c/d/e/f/g"); + expectedVals.put(entryPrefix + "\u76F4\u6A39", "./g"); + + // Notice how a directory link might contain a trailing slash, or it might not. + // Also note: symlinks are always stored as files, even if they link to directories. + expectedVals.put(entryPrefix + "link5", "../COMPRESS-214_unix_symlinks/././a/b"); + expectedVals.put(entryPrefix + "link6", "../COMPRESS-214_unix_symlinks/././a/b/"); + + // I looked into creating a test with hard links, but zip does not appear to + // support hard links, so nevermind. + + final File archive = getFile("COMPRESS-214_unix_symlinks.zip"); + + zf = new ZipFile(archive); + final Enumeration<ZipArchiveEntry> en = zf.getEntries(); + while (en.hasMoreElements()) { + final ZipArchiveEntry zae = en.nextElement(); + final String link = zf.getUnixSymlink(zae); + if (zae.isUnixSymlink()) { + final String name = zae.getName(); + final String expected = expectedVals.get(name); + assertEquals(expected, link); + } else { + // Should be null if it's not a symlink! + assertNull(link); + } + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-227" + */ + @Test + public void testDuplicateEntry() throws Exception { + final File archive = getFile("COMPRESS-227.zip"); + zf = new ZipFile(archive); + + final ZipArchiveEntry ze = zf.getEntry("test1.txt"); + assertNotNull(ze); + assertNotNull(zf.getInputStream(ze)); + + int numberOfEntries = 0; + for (final ZipArchiveEntry entry : zf.getEntries("test1.txt")) { + numberOfEntries++; + assertNotNull(zf.getInputStream(entry)); + } + assertEquals(2, numberOfEntries); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-228" + */ + @Test + public void testExcessDataInZip64ExtraField() throws Exception { + final File archive = getFile("COMPRESS-228.zip"); + zf = new ZipFile(archive); + // actually, if we get here, the test already has passed + + final ZipArchiveEntry ze = zf.getEntry("src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java"); + assertEquals(26101, ze.getSize()); + } + + @Test + public void testUnshrinking() throws Exception { + zf = new ZipFile(getFile("SHRUNK.ZIP")); + ZipArchiveEntry test = zf.getEntry("TEST1.XML"); + FileInputStream original = new FileInputStream(getFile("test1.xml")); + try { + assertArrayEquals(IOUtils.toByteArray(original), + IOUtils.toByteArray(zf.getInputStream(test))); + } finally { + original.close(); + } + test = zf.getEntry("TEST2.XML"); + original = new FileInputStream(getFile("test2.xml")); + try { + assertArrayEquals(IOUtils.toByteArray(original), + IOUtils.toByteArray(zf.getInputStream(test))); + } finally { + original.close(); + } + } + + /** + * Test case for + * <a href="https://issues.apache.org/jira/browse/COMPRESS-264" + * >COMPRESS-264</a>. + */ + @Test + public void testReadingOfFirstStoredEntry() throws Exception { + final File archive = getFile("COMPRESS-264.zip"); + zf = new ZipFile(archive); + final ZipArchiveEntry ze = zf.getEntry("test.txt"); + assertEquals(5, ze.getSize()); + assertArrayEquals(new byte[] {'d', 'a', 't', 'a', '\n'}, + IOUtils.toByteArray(zf.getInputStream(ze))); + } + + @Test + public void testUnzipBZip2CompressedEntry() throws Exception { + final File archive = getFile("bzip2-zip.zip"); + zf = new ZipFile(archive); + final ZipArchiveEntry ze = zf.getEntry("lots-of-as"); + assertEquals(42, ze.getSize()); + final byte[] expected = new byte[42]; + Arrays.fill(expected , (byte)'a'); + assertArrayEquals(expected, IOUtils.toByteArray(zf.getInputStream(ze))); + } + + @Test + public void testConcurrentReadSeekable() throws Exception { + // mixed.zip contains both inflated and stored files + byte[] data = null; + try (FileInputStream fis = new FileInputStream(getFile("mixed.zip"))) { + data = IOUtils.toByteArray(fis); + } + zf = new ZipFile(new SeekableInMemoryByteChannel(data), ZipEncodingHelper.UTF8); + + final Map<String, byte[]> content = new HashMap<String, byte[]>(); + for (ZipArchiveEntry entry: Collections.list(zf.getEntries())) { + content.put(entry.getName(), IOUtils.toByteArray(zf.getInputStream(entry))); + } + + final AtomicInteger passedCount = new AtomicInteger(); + Runnable run = new Runnable() { + @Override + public void run() { + for (ZipArchiveEntry entry: Collections.list(zf.getEntries())) { + assertAllReadMethods(content.get(entry.getName()), zf, entry); + } + passedCount.incrementAndGet(); + } + }; + Thread t0 = new Thread(run); + Thread t1 = new Thread(run); + t0.start(); + t1.start(); + t0.join(); + t1.join(); + assertEquals(2, passedCount.get()); + } + + @Test + public void testConcurrentReadFile() throws Exception { + // mixed.zip contains both inflated and stored files + final File archive = getFile("mixed.zip"); + zf = new ZipFile(archive); + + final Map<String, byte[]> content = new HashMap<String, byte[]>(); + for (ZipArchiveEntry entry: Collections.list(zf.getEntries())) { + content.put(entry.getName(), IOUtils.toByteArray(zf.getInputStream(entry))); + } + + final AtomicInteger passedCount = new AtomicInteger(); + Runnable run = new Runnable() { + @Override + public void run() { + for (ZipArchiveEntry entry: Collections.list(zf.getEntries())) { + assertAllReadMethods(content.get(entry.getName()), zf, entry); + } + passedCount.incrementAndGet(); + } + }; + Thread t0 = new Thread(run); + Thread t1 = new Thread(run); + t0.start(); + t1.start(); + t0.join(); + t1.join(); + assertEquals(2, passedCount.get()); + } + + /** + * Test correct population of header and data offsets. + */ + @Test + public void testOffsets() throws Exception { + // mixed.zip contains both inflated and stored files + final File archive = getFile("mixed.zip"); + try (ZipFile zf = new ZipFile(archive)) { + ZipArchiveEntry inflatedEntry = zf.getEntry("inflated.txt"); + Assert.assertEquals(0x0000, inflatedEntry.getLocalHeaderOffset()); + Assert.assertEquals(0x0046, inflatedEntry.getDataOffset()); + Assert.assertTrue(inflatedEntry.isStreamContiguous()); + ZipArchiveEntry storedEntry = zf.getEntry("stored.txt"); + Assert.assertEquals(0x5892, storedEntry.getLocalHeaderOffset()); + Assert.assertEquals(0x58d6, storedEntry.getDataOffset()); + Assert.assertTrue(inflatedEntry.isStreamContiguous()); + } + } + + /** + * Test correct population of header and data offsets when they are written after stream. + */ + @Test + public void testDelayedOffsetsAndSizes() throws Exception { + ByteArrayOutputStream zipContent = new ByteArrayOutputStream(); + try (ZipArchiveOutputStream zipOutput = new ZipArchiveOutputStream(zipContent)) { + ZipArchiveEntry inflatedEntry = new ZipArchiveEntry("inflated.txt"); + inflatedEntry.setMethod(ZipEntry.DEFLATED); + zipOutput.putArchiveEntry(inflatedEntry); + zipOutput.write("Hello Deflated\n".getBytes()); + zipOutput.closeArchiveEntry(); + + byte[] storedContent = "Hello Stored\n".getBytes(); + ZipArchiveEntry storedEntry = new ZipArchiveEntry("stored.txt"); + storedEntry.setMethod(ZipEntry.STORED); + storedEntry.setSize(storedContent.length); + storedEntry.setCrc(calculateCrc32(storedContent)); + zipOutput.putArchiveEntry(storedEntry); + zipOutput.write("Hello Stored\n".getBytes()); + zipOutput.closeArchiveEntry(); + + } + + try (ZipFile zf = new ZipFile(new SeekableInMemoryByteChannel(zipContent.toByteArray()))) { + ZipArchiveEntry inflatedEntry = zf.getEntry("inflated.txt"); + Assert.assertNotEquals(-1L, inflatedEntry.getLocalHeaderOffset()); + Assert.assertNotEquals(-1L, inflatedEntry.getDataOffset()); + Assert.assertTrue(inflatedEntry.isStreamContiguous()); + Assert.assertNotEquals(-1L, inflatedEntry.getCompressedSize()); + Assert.assertNotEquals(-1L, inflatedEntry.getSize()); + ZipArchiveEntry storedEntry = zf.getEntry("stored.txt"); + Assert.assertNotEquals(-1L, storedEntry.getLocalHeaderOffset()); + Assert.assertNotEquals(-1L, storedEntry.getDataOffset()); + Assert.assertTrue(inflatedEntry.isStreamContiguous()); + Assert.assertNotEquals(-1L, storedEntry.getCompressedSize()); + Assert.assertNotEquals(-1L, storedEntry.getSize()); + } + } + + /** + * Test entries alignment. + */ + @Test + public void testEntryAlignment() throws Exception { + SeekableInMemoryByteChannel zipContent = new SeekableInMemoryByteChannel(); + try (ZipArchiveOutputStream zipOutput = new ZipArchiveOutputStream(zipContent)) { + ZipArchiveEntry inflatedEntry = new ZipArchiveEntry("inflated.txt"); + inflatedEntry.setMethod(ZipEntry.DEFLATED); + inflatedEntry.setAlignment(1024); + zipOutput.putArchiveEntry(inflatedEntry); + zipOutput.write("Hello Deflated\n".getBytes(Charset.forName("UTF-8"))); + zipOutput.closeArchiveEntry(); + + ZipArchiveEntry storedEntry = new ZipArchiveEntry("stored.txt"); + storedEntry.setMethod(ZipEntry.STORED); + storedEntry.setAlignment(1024); + zipOutput.putArchiveEntry(storedEntry); + zipOutput.write("Hello Stored\n".getBytes(Charset.forName("UTF-8"))); + zipOutput.closeArchiveEntry(); + + ZipArchiveEntry storedEntry2 = new ZipArchiveEntry("stored2.txt"); + storedEntry2.setMethod(ZipEntry.STORED); + storedEntry2.setAlignment(1024); + storedEntry2.addExtraField(new ResourceAlignmentExtraField(1)); + zipOutput.putArchiveEntry(storedEntry2); + zipOutput.write("Hello overload-alignment Stored\n".getBytes(Charset.forName("UTF-8"))); + zipOutput.closeArchiveEntry(); + + ZipArchiveEntry storedEntry3 = new ZipArchiveEntry("stored3.txt"); + storedEntry3.setMethod(ZipEntry.STORED); + storedEntry3.addExtraField(new ResourceAlignmentExtraField(1024)); + zipOutput.putArchiveEntry(storedEntry3); + zipOutput.write("Hello copy-alignment Stored\n".getBytes(Charset.forName("UTF-8"))); + zipOutput.closeArchiveEntry(); + + } + + try (ZipFile zf = new ZipFile(new SeekableInMemoryByteChannel( + Arrays.copyOfRange(zipContent.array(), 0, (int)zipContent.size()) + ))) { + ZipArchiveEntry inflatedEntry = zf.getEntry("inflated.txt"); + ResourceAlignmentExtraField inflatedAlignmentEx = + (ResourceAlignmentExtraField)inflatedEntry.getExtraField(ResourceAlignmentExtraField.ID); + assertNotEquals(-1L, inflatedEntry.getCompressedSize()); + assertNotEquals(-1L, inflatedEntry.getSize()); + assertEquals(0L, inflatedEntry.getDataOffset()%1024); + assertNotNull(inflatedAlignmentEx); + assertEquals(1024, inflatedAlignmentEx.getAlignment()); + assertFalse(inflatedAlignmentEx.allowMethodChange()); + try (InputStream stream = zf.getInputStream(inflatedEntry)) { + Assert.assertEquals("Hello Deflated\n", + new String(IOUtils.toByteArray(stream), Charset.forName("UTF-8"))); + } + ZipArchiveEntry storedEntry = zf.getEntry("stored.txt"); + ResourceAlignmentExtraField storedAlignmentEx = + (ResourceAlignmentExtraField)storedEntry.getExtraField(ResourceAlignmentExtraField.ID); + assertNotEquals(-1L, storedEntry.getCompressedSize()); + assertNotEquals(-1L, storedEntry.getSize()); + assertEquals(0L, storedEntry.getDataOffset()%1024); + assertNotNull(storedAlignmentEx); + assertEquals(1024, storedAlignmentEx.getAlignment()); + assertFalse(storedAlignmentEx.allowMethodChange()); + try (InputStream stream = zf.getInputStream(storedEntry)) { + Assert.assertEquals("Hello Stored\n", + new String(IOUtils.toByteArray(stream), Charset.forName("UTF-8"))); + } + + ZipArchiveEntry storedEntry2 = zf.getEntry("stored2.txt"); + ResourceAlignmentExtraField stored2AlignmentEx = + (ResourceAlignmentExtraField)storedEntry2.getExtraField(ResourceAlignmentExtraField.ID); + assertNotEquals(-1L, storedEntry2.getCompressedSize()); + assertNotEquals(-1L, storedEntry2.getSize()); + assertEquals(0L, storedEntry2.getDataOffset()%1024); + assertNotNull(stored2AlignmentEx); + assertEquals(1024, stored2AlignmentEx.getAlignment()); + assertFalse(stored2AlignmentEx.allowMethodChange()); + try (InputStream stream = zf.getInputStream(storedEntry2)) { + Assert.assertEquals("Hello overload-alignment Stored\n", + new String(IOUtils.toByteArray(stream), Charset.forName("UTF-8"))); + } + + ZipArchiveEntry storedEntry3 = zf.getEntry("stored3.txt"); + ResourceAlignmentExtraField stored3AlignmentEx = + (ResourceAlignmentExtraField)storedEntry3.getExtraField(ResourceAlignmentExtraField.ID); + assertNotEquals(-1L, storedEntry3.getCompressedSize()); + assertNotEquals(-1L, storedEntry3.getSize()); + assertEquals(0L, storedEntry3.getDataOffset()%1024); + assertNotNull(stored3AlignmentEx); + assertEquals(1024, stored3AlignmentEx.getAlignment()); + assertFalse(stored3AlignmentEx.allowMethodChange()); + try (InputStream stream = zf.getInputStream(storedEntry3)) { + Assert.assertEquals("Hello copy-alignment Stored\n", + new String(IOUtils.toByteArray(stream), Charset.forName("UTF-8"))); + } + } + } + + /** + * Test too big alignment, resulting into exceeding extra field limit. + */ + @Test(expected = IllegalArgumentException.class) + public void testEntryAlignmentExceed() throws Exception { + SeekableInMemoryByteChannel zipContent = new SeekableInMemoryByteChannel(); + try (ZipArchiveOutputStream zipOutput = new ZipArchiveOutputStream(zipContent)) { + ZipArchiveEntry inflatedEntry = new ZipArchiveEntry("inflated.txt"); + inflatedEntry.setMethod(ZipEntry.STORED); + inflatedEntry.setAlignment(0x20000); + } + } + + /** + * Test non power of 2 alignment. + */ + @Test(expected = IllegalArgumentException.class) + public void testInvalidAlignment() throws Exception { + ZipArchiveEntry entry = new ZipArchiveEntry("dummy"); + entry.setAlignment(3); + } + + @Test + public void nameSourceDefaultsToName() throws Exception { + nameSource("bla.zip", "test1.xml", ZipArchiveEntry.NameSource.NAME); + } + + @Test + public void nameSourceIsSetToUnicodeExtraField() throws Exception { + nameSource("utf8-winzip-test.zip", "\u20AC_for_Dollar.txt", + ZipArchiveEntry.NameSource.UNICODE_EXTRA_FIELD); + } + + @Test + public void nameSourceIsSetToEFS() throws Exception { + nameSource("utf8-7zip-test.zip", "\u20AC_for_Dollar.txt", + ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-380" + */ + @Test + public void readDeflate64CompressedStream() throws Exception { + final File input = getFile("COMPRESS-380/COMPRESS-380-input"); + final File archive = getFile("COMPRESS-380/COMPRESS-380.zip"); + try (FileInputStream in = new FileInputStream(input); + ZipFile zf = new ZipFile(archive)) { + byte[] orig = IOUtils.toByteArray(in); + ZipArchiveEntry e = zf.getEntry("input2"); + try (InputStream s = zf.getInputStream(e)) { + byte[] fromZip = IOUtils.toByteArray(s); + assertArrayEquals(orig, fromZip); + } + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("bla.zip")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingStore() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("COMPRESS-264.zip")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingUnshrink() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("SHRUNK.ZIP")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingExplode() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("imploding-8Kdict-3trees.zip")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate64() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("COMPRESS-380/COMPRESS-380.zip")); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofUsingBzip2() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(getFile("bzip2-zip.zip")); + } + + private void singleByteReadConsistentlyReturnsMinusOneAtEof(File file) throws Exception { + try (ZipFile archive = new ZipFile(file)) { + ZipArchiveEntry e = archive.getEntries().nextElement(); + try (InputStream is = archive.getInputStream(e)) { + IOUtils.toByteArray(is); + assertEquals(-1, is.read()); + assertEquals(-1, is.read()); + } + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("bla.zip")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingStore() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("COMPRESS-264.zip")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingUnshrink() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("SHRUNK.ZIP")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingExplode() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("imploding-8Kdict-3trees.zip")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingDeflate64() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("COMPRESS-380/COMPRESS-380.zip")); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofUsingBzip2() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(getFile("bzip2-zip.zip")); + } + + private void multiByteReadConsistentlyReturnsMinusOneAtEof(File file) throws Exception { + byte[] buf = new byte[2]; + try (ZipFile archive = new ZipFile(file)) { + ZipArchiveEntry e = archive.getEntries().nextElement(); + try (InputStream is = archive.getInputStream(e)) { + IOUtils.toByteArray(is); + assertEquals(-1, is.read(buf)); + assertEquals(-1, is.read(buf)); + } + } + } + + private void assertAllReadMethods(byte[] expected, ZipFile zipFile, ZipArchiveEntry entry) { + // simple IOUtil read + try (InputStream stream = zf.getInputStream(entry)) { + byte[] full = IOUtils.toByteArray(stream); + assertArrayEquals(expected, full); + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + + // big buffer at the beginning and then chunks by IOUtils read + try (InputStream stream = zf.getInputStream(entry)) { + byte[] full; + byte[] bytes = new byte[0x40000]; + int read = stream.read(bytes); + if (read < 0) { + full = new byte[0]; + } + else { + full = readStreamRest(bytes, read, stream); + } + assertArrayEquals(expected, full); + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + + // small chunk / single byte and big buffer then + try (InputStream stream = zf.getInputStream(entry)) { + byte[] full; + int single = stream.read(); + if (single < 0) { + full = new byte[0]; + } + else { + byte[] big = new byte[0x40000]; + big[0] = (byte)single; + int read = stream.read(big, 1, big.length-1); + if (read < 0) { + full = new byte[]{ (byte)single }; + } + else { + full = readStreamRest(big, read+1, stream); + } + } + assertArrayEquals(expected, full); + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Utility to append the rest of the stream to already read data. + */ + private byte[] readStreamRest(byte[] beginning, int length, InputStream stream) throws IOException { + byte[] rest = IOUtils.toByteArray(stream); + byte[] full = new byte[length+rest.length]; + System.arraycopy(beginning, 0, full, 0, length); + System.arraycopy(rest, 0, full, length, rest.length); + return full; + } + + private long calculateCrc32(byte[] content) { + CRC32 crc = new CRC32(); + crc.update(content); + return crc.getValue(); + } + + /* + * ordertest.zip has been handcrafted. + * + * It contains enough files so any random coincidence of + * entries.keySet() and central directory order would be unlikely + * - in fact testCDOrder fails in svn revision 920284. + * + * The central directory has ZipFile and ZipUtil swapped so + * central directory order is different from entry data order. + */ + private void readOrderTest() throws Exception { + final File archive = getFile("ordertest.zip"); + zf = new ZipFile(archive); + } + + private static void assertEntryName(final ArrayList<ZipArchiveEntry> entries, + final int index, + final String expectedName) { + final ZipArchiveEntry ze = entries.get(index); + assertEquals("src/main/java/org/apache/commons/compress/archivers/zip/" + + expectedName + ".java", + ze.getName()); + } + + private static void nameSource(String archive, String entry, ZipArchiveEntry.NameSource expected) throws Exception { + try (ZipFile zf = new ZipFile(getFile(archive))) { + ZipArchiveEntry ze = zf.getEntry(entry); + assertEquals(entry, ze.getName()); + assertEquals(expected, ze.getNameSource()); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipLongTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipLongTest.java new file mode 100644 index 000000000..75709fd5a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipLongTest.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * JUnit testcases for org.apache.commons.compress.archivers.zip.ZipLong. + * + */ +public class ZipLongTest { + + /** + * Test conversion to bytes. + */ + @Test + public void testToBytes() { + final ZipLong zl = new ZipLong(0x12345678); + final byte[] result = zl.getBytes(); + assertEquals("length getBytes", 4, result.length); + assertEquals("first byte getBytes", 0x78, result[0]); + assertEquals("second byte getBytes", 0x56, result[1]); + assertEquals("third byte getBytes", 0x34, result[2]); + assertEquals("fourth byte getBytes", 0x12, result[3]); + } + + /** + * Test conversion to bytes. + */ + @Test + public void testPut() { + final byte[] arr = new byte[5]; + ZipLong.putLong(0x12345678, arr, 1); + assertEquals("first byte getBytes", 0x78, arr[1]); + assertEquals("second byte getBytes", 0x56, arr[2]); + assertEquals("third byte getBytes", 0x34, arr[3]); + assertEquals("fourth byte getBytes", 0x12, arr[4]); + } + + /** + * Test conversion from bytes. + */ + @Test + public void testFromBytes() { + final byte[] val = new byte[] {0x78, 0x56, 0x34, 0x12}; + final ZipLong zl = new ZipLong(val); + assertEquals("value from bytes", 0x12345678, zl.getValue()); + } + + /** + * Test the contract of the equals method. + */ + @Test + public void testEquals() { + final ZipLong zl = new ZipLong(0x12345678); + final ZipLong zl2 = new ZipLong(0x12345678); + final ZipLong zl3 = new ZipLong(0x87654321); + + assertTrue("reflexive", zl.equals(zl)); + + assertTrue("works", zl.equals(zl2)); + assertTrue("works, part two", !zl.equals(zl3)); + + assertTrue("symmetric", zl2.equals(zl)); + + assertTrue("null handling", !zl.equals(null)); + assertTrue("non ZipLong handling", !zl.equals(new Integer(0x1234))); + } + + /** + * Test sign handling. + */ + @Test + public void testSign() { + ZipLong zl = new ZipLong(new byte[] {(byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF}); + assertEquals(0x00000000FFFFFFFFl, zl.getValue()); + assertEquals(-1,zl.getIntValue()); + + zl = new ZipLong(0xFFFF_FFFFL); + assertEquals(0x00000000FFFFFFFFl, zl.getValue()); + zl = new ZipLong(0xFFFF_FFFF); + assertEquals(0xFFFF_FFFF_FFFF_FFFFL, zl.getValue()); + + } + + @Test + public void testClone() { + final ZipLong s1 = new ZipLong(42); + final ZipLong s2 = (ZipLong) s1.clone(); + assertNotSame(s1, s2); + assertEquals(s1, s2); + assertEquals(s1.getValue(), s2.getValue()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipShortTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipShortTest.java new file mode 100644 index 000000000..7354e9779 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipShortTest.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import org.junit.Test; + +/** + * JUnit testcases for org.apache.commons.compress.archivers.zip.ZipShort. + * + */ +public class ZipShortTest { + + /** + * Test conversion to bytes. + */ + @Test + public void testToBytes() { + final ZipShort zs = new ZipShort(0x1234); + final byte[] result = zs.getBytes(); + assertEquals("length getBytes", 2, result.length); + assertEquals("first byte getBytes", 0x34, result[0]); + assertEquals("second byte getBytes", 0x12, result[1]); + } + + + /** + * Test conversion to bytes. + */ + @Test + public void testPut() { + final byte[] arr = new byte[3]; + ZipShort.putShort(0x1234, arr, 1); + assertEquals("first byte getBytes", 0x34, arr[1]); + assertEquals("second byte getBytes", 0x12, arr[2]); + } + + + /** + * Test conversion from bytes. + */ + @Test + public void testFromBytes() { + final byte[] val = new byte[] {0x34, 0x12}; + final ZipShort zs = new ZipShort(val); + assertEquals("value from bytes", 0x1234, zs.getValue()); + } + + /** + * Test the contract of the equals method. + */ + @Test + public void testEquals() { + final ZipShort zs = new ZipShort(0x1234); + final ZipShort zs2 = new ZipShort(0x1234); + final ZipShort zs3 = new ZipShort(0x5678); + + assertTrue("reflexive", zs.equals(zs)); + + assertTrue("works", zs.equals(zs2)); + assertTrue("works, part two", !zs.equals(zs3)); + + assertTrue("symmetric", zs2.equals(zs)); + + assertTrue("null handling", !zs.equals(null)); + assertTrue("non ZipShort handling", !zs.equals(new Integer(0x1234))); + } + + /** + * Test sign handling. + */ + @Test + public void testSign() { + final ZipShort zs = new ZipShort(new byte[] {(byte)0xFF, (byte)0xFF}); + assertEquals(0x0000FFFF, zs.getValue()); + } + + @Test + public void testClone() { + final ZipShort s1 = new ZipShort(42); + final ZipShort s2 = (ZipShort) s1.clone(); + assertNotSame(s1, s2); + assertEquals(s1, s2); + assertEquals(s1.getValue(), s2.getValue()); + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipUtilTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipUtilTest.java new file mode 100644 index 000000000..1ce819f05 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipUtilTest.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; +import java.util.TimeZone; + +import org.junit.Before; +import org.junit.Test; + +public class ZipUtilTest { + + private Date time; + private ZipLong zl; + + @Before + public void setUp() throws Exception { + time = new Date(); + final Calendar cal = Calendar.getInstance(); + cal.setTime(time); + final int year = cal.get(Calendar.YEAR); + final int month = cal.get(Calendar.MONTH) + 1; + final long value = ((year - 1980) << 25) + | (month << 21) + | (cal.get(Calendar.DAY_OF_MONTH) << 16) + | (cal.get(Calendar.HOUR_OF_DAY) << 11) + | (cal.get(Calendar.MINUTE) << 5) + | (cal.get(Calendar.SECOND) >> 1); + + final byte[] result = new byte[4]; + result[0] = (byte) ((value & 0xFF)); + result[1] = (byte) ((value & 0xFF00) >> 8); + result[2] = (byte) ((value & 0xFF0000) >> 16); + result[3] = (byte) ((value & 0xFF000000L) >> 24); + zl = new ZipLong(result); + } + + @Test + public void testZipLong() throws Exception { + final ZipLong test = ZipUtil.toDosTime(time); + assertEquals(test.getValue(), zl.getValue()); + } + + @Test + public void testAdjustToLong() { + assertEquals(Integer.MAX_VALUE, + ZipUtil.adjustToLong(Integer.MAX_VALUE)); + assertEquals(((long) Integer.MAX_VALUE) + 1, + ZipUtil.adjustToLong(Integer.MAX_VALUE + 1)); + assertEquals(2 * ((long) Integer.MAX_VALUE), + ZipUtil.adjustToLong(2 * Integer.MAX_VALUE)); + } + + @Test + public void testMinTime(){ + final byte[] b1 = ZipUtil.toDosTime(0); + final byte b10 = b1[0]; // Save the first byte + b1[0]++; // change it + final byte[] b2 = ZipUtil.toDosTime(0); // get the same time + assertEquals(b10,b2[0]); // first byte should still be the same + } + + @Test + public void testOutsideCalendar(){ + final byte[] b1 = ZipUtil.toDosTime(160441200000L); // 1.1..1975 + assertEquals(0, b1[0]); + assertEquals(33, b1[1]); + assertEquals(0, b1[2]); + assertEquals(0, b1[3]); + } + + @Test + public void testInsideCalendar(){ + final TimeZone tz = TimeZone.getDefault(); + final long date = 476096400000L; // 1.1.1985, 10:00 am GMT + final byte[] b1 = ZipUtil.toDosTime(date - tz.getOffset(date)); + assertEquals(0, b1[0]); + assertEquals(72, b1[1]); + assertEquals(65, b1[2]); + assertEquals(10, b1[3]); + } + + @Test + public void testReverse() { + final byte[][] bTest = new byte[6][]; + bTest[0] = new byte[]{}; + bTest[1] = new byte[]{1}; + bTest[2] = new byte[]{1, 2}; + bTest[3] = new byte[]{1, 2, 3}; + bTest[4] = new byte[]{1, 2, 3, 4}; + bTest[5] = new byte[]{1, 2, 3, 4, 5}; + + final byte[][] rTest = new byte[6][]; + rTest[0] = new byte[]{}; + rTest[1] = new byte[]{1}; + rTest[2] = new byte[]{2, 1}; + rTest[3] = new byte[]{3, 2, 1}; + rTest[4] = new byte[]{4, 3, 2, 1}; + rTest[5] = new byte[]{5, 4, 3, 2, 1}; + + assertEquals("test and result arrays are same length", bTest.length, rTest.length); + + for (int i = 0; i < bTest.length; i++) { + final byte[] result = ZipUtil.reverse(bTest[i]); + assertTrue("reverse mutates in-place", bTest[i] == result); + assertTrue("reverse actually reverses", Arrays.equals(rTest[i], result)); + } + } + + @Test + public void testBigToLong() { + final BigInteger big1 = BigInteger.valueOf(1); + final BigInteger big2 = BigInteger.valueOf(Long.MAX_VALUE); + final BigInteger big3 = BigInteger.valueOf(Long.MIN_VALUE); + + assertEquals(1L, ZipUtil.bigToLong(big1)); + assertEquals(Long.MAX_VALUE, ZipUtil.bigToLong(big2)); + assertEquals(Long.MIN_VALUE, ZipUtil.bigToLong(big3)); + + final BigInteger big4 = big2.add(big1); + try { + ZipUtil.bigToLong(big4); + fail("Should have thrown IllegalArgumentException"); + } catch (final IllegalArgumentException iae) { + // All is good. + } + + final BigInteger big5 = big3.subtract(big1); + try { + ZipUtil.bigToLong(big5); + fail("ZipUtil.bigToLong(BigInteger) should have thrown IllegalArgumentException"); + } catch (final IllegalArgumentException iae) { + // All is good. + } + } + + @Test + public void testLongToBig() { + final long l0 = 0; + final long l1 = 1; + final long l2 = -1; + final long l3 = Integer.MIN_VALUE; + final long l4 = Long.MAX_VALUE; + final long l5 = Long.MIN_VALUE; + + final BigInteger big0 = ZipUtil.longToBig(l0); + final BigInteger big1 = ZipUtil.longToBig(l1); + final BigInteger big2 = ZipUtil.longToBig(l2); + final BigInteger big3 = ZipUtil.longToBig(l3); + final BigInteger big4 = ZipUtil.longToBig(l4); + + assertEquals(0, big0.longValue()); + assertEquals(1, big1.longValue()); + assertEquals(0xFFFFFFFFL, big2.longValue()); + assertEquals(0x80000000L, big3.longValue()); + assertEquals(Long.MAX_VALUE, big4.longValue()); + + try { + ZipUtil.longToBig(l5); + fail("ZipUtil.longToBig(long) should have thrown IllegalArgumentException"); + } catch (final IllegalArgumentException iae) { + + } + } + + @Test + public void testSignedByteToUnsignedInt() { + // Yay, we can completely test all possible input values in this case! + int expectedVal = 128; + for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) { + final byte b = (byte) i; + assertEquals(expectedVal, ZipUtil.signedByteToUnsignedInt(b)); + expectedVal++; + if (expectedVal == 256) { + expectedVal = 0; + } + } + } + + @Test + public void testUnsignedIntToSignedByte() { + int unsignedVal = 128; + for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) { + final byte expectedVal = (byte) i; + assertEquals(expectedVal, ZipUtil.unsignedIntToSignedByte(unsignedVal)); + unsignedVal++; + if (unsignedVal == 256) { + unsignedVal = 0; + } + } + + try { + ZipUtil.unsignedIntToSignedByte(-1); + fail("ZipUtil.unsignedIntToSignedByte(-1) should have thrown IllegalArgumentException"); + } catch (final IllegalArgumentException iae) { + // All is good. + } + + try { + ZipUtil.unsignedIntToSignedByte(256); + fail("ZipUtil.unsignedIntToSignedByte(256) should have thrown IllegalArgumentException"); + } catch (final IllegalArgumentException iae) { + // All is good. + } + + } + + +} diff --git a/src/test/java/org/apache/commons/compress/changes/ChangeSetTestCase.java b/src/test/java/org/apache/commons/compress/changes/ChangeSetTestCase.java new file mode 100644 index 000000000..9565ae919 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/changes/ChangeSetTestCase.java @@ -0,0 +1,1296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.changes; + +import static org.junit.Assert.*; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ar.ArArchiveEntry; +import org.apache.commons.compress.archivers.cpio.CpioArchiveEntry; +import org.apache.commons.compress.archivers.jar.JarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.junit.Test; + +/** + * Checks several ChangeSet business logics. + */ +public final class ChangeSetTestCase extends AbstractTestCase { + + // Delete a directory tree + private void archiveListDeleteDir(final String prefix){ + final Iterator<String> it = archiveList.iterator(); + while(it.hasNext()){ + final String entry = it.next(); + if (entry.startsWith(prefix+"/")){ // TODO won't work with folders + it.remove(); + } + } + } + + // Delete a single file + private void archiveListDelete(final String prefix){ + final Iterator<String> it = archiveList.iterator(); + while(it.hasNext()){ + final String entry = it.next(); + if (entry.equals(prefix)){ + it.remove(); + } + } + } + + /** + * Adds an ArchiveEntry with the same name two times. + * Only the latest addition should be found in the ChangeSet, + * the first add should be replaced. + * + * @throws Exception + */ + @Test + public void testAddChangeTwice() throws Exception { + InputStream in = null; + InputStream in2 = null; + try { + in = new FileInputStream(getFile("test.txt")); + in2 = new FileInputStream(getFile("test2.xml")); + + final ArchiveEntry e = new ZipArchiveEntry("test.txt"); + final ArchiveEntry e2 = new ZipArchiveEntry("test.txt"); + + final ChangeSet changes = new ChangeSet(); + changes.add(e, in); + changes.add(e2, in2); + + assertEquals(1, changes.getChanges().size()); + final Change c = changes.getChanges().iterator().next(); + assertEquals(in2, c.getInput()); + } finally { + if (in != null) { + in.close(); + } + if (in2 != null) { + in2.close(); + } + } + } + + /** + * Adds an ArchiveEntry with the same name two times. + * Only the first addition should be found in the ChangeSet, + * the second add should never be added since replace = false + * + * @throws Exception + */ + @Test + public void testAddChangeTwiceWithoutReplace() throws Exception { + InputStream in = null; + InputStream in2 = null; + try { + in = new FileInputStream(getFile("test.txt")); + in2 = new FileInputStream(getFile("test2.xml")); + + final ArchiveEntry e = new ZipArchiveEntry("test.txt"); + final ArchiveEntry e2 = new ZipArchiveEntry("test.txt"); + + final ChangeSet changes = new ChangeSet(); + changes.add(e, in, true); + changes.add(e2, in2, false); + + assertEquals(1, changes.getChanges().size()); + final Change c = changes.getChanges().iterator().next(); + assertEquals(in, c.getInput()); + } finally { + if (in != null) { + in.close(); + } + if (in2 != null) { + in2.close(); + } + } + } + + /** + * Tries to delete the folder "bla" from an archive file. This should result in + * the deletion of bla/*, which actually means bla/test4.xml should be + * removed from the archive. The file something/bla (without ending, named + * like the folder) should not be deleted. + * + * @throws Exception + */ + @Test + public void testDeleteDir() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + changes.deleteDir("bla"); + archiveListDeleteDir("bla"); + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Tries to delete the folder "la" from an archive file. This should result in + * the deletion of la/*, which should not match any files/folders. + * + * @throws Exception + */ + @Test + public void testDeleteDir2() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + changes.deleteDir("la"); + archiveListDeleteDir("la"); + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Tries to delete the folder "test.txt" from an archive file. + * This should not match any files/folders. + * + * @throws Exception + */ + @Test + public void testDeleteDir3() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + changes.deleteDir("test.txt"); + archiveListDeleteDir("test.txt"); + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Tries to delete the file "bla/test5.xml" from an archive. This should + * result in the deletion of "bla/test5.xml". + * + * @throws Exception + */ + @Test + public void testDeleteFile() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + changes.delete("bla/test5.xml"); + archiveListDelete("bla/test5.xml"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Tries to delete the file "bla" from an archive. This should + * result in the deletion of nothing. + * + * @throws Exception + */ + @Test + public void testDeleteFile2() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + changes.delete("bla"); + //archiveListDelete("bla"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Tries to delete and then add a file with the same name. + * Should delete test/test3.xml and adds test.txt with the name + * test/test3.xml + * + * @throws Exception + */ + @Test + public void testDeletePlusAddSame() throws Exception { + final String archivename = "zip"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + + File testtxt = null; + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + changes.delete("test/test3.xml"); + archiveListDelete("test/test3.xml"); + + // Add a file + testtxt = getFile("test.txt"); + final ArchiveEntry entry = out.createArchiveEntry(testtxt, "test/test3.xml"); + changes.add(entry, new FileInputStream(testtxt)); + archiveList.add("test/test3.xml"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + // Checks + ArchiveInputStream in = null; + File check = null; + try { + final InputStream is = new FileInputStream(result); + final BufferedInputStream buf = new BufferedInputStream(is); + in = factory.createArchiveInputStream(buf); + check = this.checkArchiveContent(in, archiveList, false); + final File test3xml = new File(check,"result/test/test3.xml"); + assertEquals(testtxt.length(), test3xml.length()); + + final BufferedReader reader = new BufferedReader(new FileReader(test3xml)); + String str; + while ((str = reader.readLine()) != null) { + // All lines look like this + "111111111111111111111111111000101011".equals(str); + } + reader.close(); + } finally { + if (in != null) { + in.close(); + } + rmdir(check); + } + } + + /** + * Checks for the correct ChangeSetResults + * + * @throws Exception + */ + @Test + public void testChangeSetResults() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + changes.deleteDir("bla"); + archiveListDeleteDir("bla"); + + // Add a file + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = out.createArchiveEntry(file1, "bla/test.txt"); + changes.add(entry, new FileInputStream(file1)); + archiveList.add("bla/test.txt"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + final ChangeSetResults results = performer.perform(ais, out); + is.close(); + + // Checks + assertEquals(1,results.getAddedFromChangeSet().size()); + assertEquals("bla/test.txt",results.getAddedFromChangeSet().iterator().next()); + assertEquals(3,results.getDeleted().size()); + assertTrue(results.getDeleted().contains("bla/test4.xml")); + assertTrue(results.getDeleted().contains("bla/test5.xml")); + assertTrue(results.getDeleted().contains("bla/blubber/test6.xml")); + + assertTrue(results.getAddedFromStream().contains("testdata/test1.xml")); + assertTrue(results.getAddedFromStream().contains("testdata/test2.xml")); + assertTrue(results.getAddedFromStream().contains("test/test3.xml")); + assertTrue(results.getAddedFromStream().contains("test.txt")); + assertTrue(results.getAddedFromStream().contains("something/bla")); + assertTrue(results.getAddedFromStream().contains("test with spaces.txt")); + assertEquals(6,results.getAddedFromStream().size()); + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Tries to delete a directory with a file and adds a new directory with a + * new file and with the same name. Should delete dir1/* and add + * dir1/test.txt at the end + * + * @throws Exception + */ + @Test + public void testDeletePlusAdd() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + changes.deleteDir("bla"); + archiveListDeleteDir("bla"); + + // Add a file + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = out.createArchiveEntry(file1, "bla/test.txt"); + changes.add(entry, new FileInputStream(file1)); + archiveList.add("bla/test.txt"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Adds a file to a zip archive. Deletes an other file. + * + * @throws Exception + */ + @Test + public void testDeleteFromAndAddToZip() throws Exception { + final String archivename = "zip"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = new ZipArchiveEntry("blub/test.txt"); + changes.add(entry, new FileInputStream(file1)); + archiveList.add("blub/test.txt"); + + changes.delete("testdata/test1.xml"); + archiveListDelete("testdata/test1.xml"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Adds a file to a zip archive. Deletes an other file. + * + * @throws Exception + */ + @Test + public void testDeleteFromAndAddToZipUsingZipFilePerform() throws Exception { + final String archivename = "zip"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ZipFile ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + ais = new ZipFile(input); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = new ZipArchiveEntry("blub/test.txt"); + changes.add(entry, new FileInputStream(file1)); + archiveList.add("blub/test.txt"); + + changes.delete("testdata/test1.xml"); + archiveListDelete("testdata/test1.xml"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * add blub/test.txt + delete blub Should add blub/test.txt and delete it + * afterwards. In this example, the archive should stay untouched. + * + * @throws Exception + */ + @Test + public void testAddDeleteAdd() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = new CpioArchiveEntry("blub/test.txt"); + changes.add(entry, new FileInputStream(file1)); + archiveList.add("blub/test.txt"); + + changes.deleteDir("blub"); + archiveListDeleteDir("blub"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * delete bla + add bla/test.txt + delete bla Deletes dir1/* first, then + * suppresses the add of bla.txt because there is a delete operation later. + * + * @throws Exception + */ + @Test + public void testDeleteAddDelete() throws Exception { + final String archivename = "cpio"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + + changes.deleteDir("bla"); + + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = new CpioArchiveEntry("bla/test.txt"); + changes.add(entry, new FileInputStream(file1)); + archiveList.add("bla/test.txt"); + + changes.deleteDir("bla"); + archiveListDeleteDir("bla"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Simple Delete from a zip file. + * + * @throws Exception + */ + @Test + public void testDeleteFromZip() throws Exception { + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + File temp = null; + try { + final ChangeSet changes = new ChangeSet(); + changes.delete("test2.xml"); + + final File input = getFile("bla.zip"); + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream("zip", is); + + temp = File.createTempFile("test", ".zip"); + temp.deleteOnExit(); + out = factory.createArchiveOutputStream("zip", + new FileOutputStream(temp)); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + final List<String> expected = new ArrayList<>(); + expected.add("test1.xml"); + + this.checkArchiveContent(temp, expected); + } + + /** + * Simple delete from a tar file + * + * @throws Exception + */ + @Test + public void testDeleteFromTar() throws Exception { + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + File temp = null; + try { + final ChangeSet changes = new ChangeSet(); + changes.delete("test2.xml"); + + final File input = getFile("bla.tar"); + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream("tar", is); + + temp = new File(dir, "bla.tar"); + out = factory.createArchiveOutputStream("tar", + new FileOutputStream(temp)); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + final List<String> expected = new ArrayList<>(); + expected.add("test1.xml"); + this.checkArchiveContent(temp, expected); + } + + /** + * Simple delete from a jar file + * + * @throws Exception + */ + @Test + public void testDeleteFromJar() throws Exception { + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + File temp = null; + try { + final ChangeSet changes = new ChangeSet(); + changes.delete("test2.xml"); + changes.deleteDir("META-INF"); + changes.delete(".classpath"); + changes.delete(".project"); + + final File input = getFile("bla.jar"); + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream("jar", is); + + temp = new File(dir, "bla.jar"); + out = factory.createArchiveOutputStream("jar", + new FileOutputStream(temp)); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + final List<String> expected = new ArrayList<>(); + expected.add("test1.xml"); + this.checkArchiveContent(temp, expected); + } + + @Test + public void testDeleteFromAndAddToTar() throws Exception { + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + File temp = null; + try { + final ChangeSet changes = new ChangeSet(); + changes.delete("test2.xml"); + + final File file1 = getFile("test.txt"); + + final TarArchiveEntry entry = new TarArchiveEntry( + "testdata/test.txt"); + entry.setModTime(0); + entry.setSize(file1.length()); + entry.setUserId(0); + entry.setGroupId(0); + entry.setUserName("avalon"); + entry.setGroupName("excalibur"); + entry.setMode(0100000); + + changes.add(entry, new FileInputStream(file1)); + + final File input = getFile("bla.tar"); + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream("tar", is); + + temp = new File(dir, "bla.tar"); + out = factory.createArchiveOutputStream("tar", + new FileOutputStream(temp)); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + final List<String> expected = new ArrayList<>(); + expected.add("test1.xml"); + expected.add("testdata/test.txt"); + final ArchiveInputStream in = factory.createArchiveInputStream("tar", new FileInputStream(temp)); + this.checkArchiveContent(in, expected); + } + + /** + * Delete from a jar file and add another file + * + * @throws Exception + */ + @Test + public void testDeleteFromAndAddToJar() throws Exception { + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + File temp = null; + try { + final ChangeSet changes = new ChangeSet(); + changes.delete("test2.xml"); + changes.deleteDir("META-INF"); + changes.delete(".classpath"); + changes.delete(".project"); + + final File file1 = getFile("test.txt"); + final JarArchiveEntry entry = new JarArchiveEntry("testdata/test.txt"); + changes.add(entry, new FileInputStream(file1)); + + final File input = getFile("bla.jar"); + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream("jar", is); + + temp = new File(dir, "bla.jar"); + out = factory.createArchiveOutputStream("jar", + new FileOutputStream(temp)); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + final List<String> expected = new ArrayList<>(); + expected.add("test1.xml"); + expected.add("testdata/test.txt"); + this.checkArchiveContent(temp, expected); + } + + /** + * Simple delete from an ar file + * + * @throws Exception + */ + @Test + public void testDeleteFromAr() throws Exception { + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + File temp = null; + try { + final ChangeSet changes = new ChangeSet(); + changes.delete("test2.xml"); + + final File input = getFile("bla.ar"); + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream("ar", is); + + temp = new File(dir, "bla.ar"); + out = factory.createArchiveOutputStream("ar", + new FileOutputStream(temp)); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + final List<String> expected = new ArrayList<>(); + expected.add("test1.xml"); + this.checkArchiveContent(temp, expected); + } + + /** + * Deletes a file from an AR-archive and adds another + * + * @throws Exception + */ + @Test + public void testDeleteFromAndAddToAr() throws Exception { + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + File temp = null; + try { + final ChangeSet changes = new ChangeSet(); + changes.delete("test2.xml"); + + final File file1 = getFile("test.txt"); + + final ArArchiveEntry entry = new ArArchiveEntry("test.txt", file1 + .length()); + + changes.add(entry, new FileInputStream(file1)); + + final File input = getFile("bla.ar"); + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream("ar", is); + + temp = new File(dir, "bla.ar"); + out = factory.createArchiveOutputStream("ar", + new FileOutputStream(temp)); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + final List<String> expected = new ArrayList<>(); + expected.add("test1.xml"); + expected.add("test.txt"); + this.checkArchiveContent(temp, expected); + } + + /** + * TODO: Move operations are not supported currently + * + * mv dir1/test.text dir2/test.txt + delete dir1 Moves the file to dir2 and + * deletes everything in dir1 + * + * @throws Exception + */ + @Test + public void testRenameAndDelete() throws Exception { + } + + /** + * TODO: Move operations are not supported currently + * + * add dir1/bla.txt + mv dir1/test.text dir2/test.txt + delete dir1 + * + * Add dir1/bla.txt should be surpressed. All other dir1 files will be + * deleted, except dir1/test.text will be moved + * + * @throws Exception + */ + @Test + public void testAddMoveDelete() throws Exception { + } + + /** + * Check can add a file to an empty archive. + * + * @throws Exception + */ + @Test + public void testAddToEmptyArchive() throws Exception { + final String archivename = "zip"; + final File input = this.createEmptyArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + InputStream is = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + final ChangeSet changes = new ChangeSet(); + try { + + is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = new ZipArchiveEntry("bla/test.txt"); + changes.add(entry, new FileInputStream(file1)); + archiveList.add("bla/test.txt"); + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); // will close is + } else if (is != null){ + is.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Check can delete and add a file to an archive with a single file + * + * @throws Exception + */ + @Test + public void testDeleteAddToOneFileArchive() throws Exception { + final String archivename = "zip"; + final File input = this.createSingleEntryArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + InputStream is = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + final ChangeSet changes = new ChangeSet(); + try { + + is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + changes.delete("test1.xml"); + archiveListDelete("test1.xml"); + + final File file = getFile("test.txt"); + final ArchiveEntry entry = out.createArchiveEntry(file,"bla/test.txt"); + changes.add(entry, new FileInputStream(file)); + archiveList.add("bla/test.txt"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); // will close is + } else if (is != null){ + is.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Check can add and delete a file to an archive with a single file + * + * @throws Exception + */ + @Test + public void testAddDeleteToOneFileArchive() throws Exception { + final String archivename = "cpio"; + final File input = this.createSingleEntryArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + InputStream is = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + final ChangeSet changes = new ChangeSet(); + try { + + is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + final File file = getFile("test.txt"); + final ArchiveEntry entry = out.createArchiveEntry(file,"bla/test.txt"); + changes.add(entry, new FileInputStream(file)); + archiveList.add("bla/test.txt"); + + changes.delete("test1.xml"); + archiveListDelete("test1.xml"); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + performer.perform(ais, out); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); // will close is + } else if (is != null){ + is.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Adds a file with the same filename as an existing file from the stream. + * Should lead to a replacement. + * + * @throws Exception + */ + @Test + public void testAddAllreadyExistingWithReplaceTrue() throws Exception { + final String archivename = "zip"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = new ZipArchiveEntry("testdata/test1.xml"); + changes.add(entry, new FileInputStream(file1), true); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + final ChangeSetResults results = performer.perform(ais, out); + assertTrue(results.getAddedFromChangeSet().contains("testdata/test1.xml")); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + + /** + * Adds a file with the same filename as an existing file from the stream. + * Should lead to a replacement. + * + * @throws Exception + */ + @Test + public void testAddAllreadyExistingWithReplaceFalse() throws Exception { + final String archivename = "zip"; + final File input = this.createArchive(archivename); + + ArchiveOutputStream out = null; + ArchiveInputStream ais = null; + final File result = File.createTempFile("test", "."+archivename); + result.deleteOnExit(); + try { + + final InputStream is = new FileInputStream(input); + ais = factory.createArchiveInputStream(archivename, is); + out = factory.createArchiveOutputStream(archivename, + new FileOutputStream(result)); + + final ChangeSet changes = new ChangeSet(); + + final File file1 = getFile("test.txt"); + final ArchiveEntry entry = new ZipArchiveEntry("testdata/test1.xml"); + changes.add(entry, new FileInputStream(file1), false); + + final ChangeSetPerformer performer = new ChangeSetPerformer(changes); + final ChangeSetResults results = performer.perform(ais, out); + assertTrue(results.getAddedFromStream().contains("testdata/test1.xml")); + assertTrue(results.getAddedFromChangeSet().isEmpty()); + assertTrue(results.getDeleted().isEmpty()); + is.close(); + + } finally { + if (out != null) { + out.close(); + } + if (ais != null) { + ais.close(); + } + } + + this.checkArchiveContent(result, archiveList); + } + +} diff --git a/src/test/java/org/apache/commons/compress/changes/ChangeTest.java b/src/test/java/org/apache/commons/compress/changes/ChangeTest.java new file mode 100644 index 000000000..907041ccb --- /dev/null +++ b/src/test/java/org/apache/commons/compress/changes/ChangeTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.changes; + +import org.apache.commons.compress.archivers.memory.MemoryArchiveEntry; +import org.junit.Test; + +import java.io.PipedInputStream; + + +/** + * Unit tests for class {@link Change}. + * + * @date 16.06.2017 + * @see Change + **/ +public class ChangeTest { + + + @Test(expected = NullPointerException.class) + public void testFailsToCreateChangeTakingFourArgumentsThrowsNullPointerExceptionOne() { + + MemoryArchiveEntry memoryArchiveEntry = new MemoryArchiveEntry("x"); + + Change change = new Change(memoryArchiveEntry, null, false); + + } + + + @Test(expected = NullPointerException.class) + public void testFailsToCreateChangeTakingFourArgumentsThrowsNullPointerExceptionTwo() { + + PipedInputStream pipedInputStream = new PipedInputStream(1); + + Change change = new Change(null, pipedInputStream, false); + + } + + + @Test(expected = NullPointerException.class) + public void testFailsToCreateChangeTakingThreeArgumentsThrowsNullPointerException() { + + Change change = new Change(null, (-407)); + + } + + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/compressors/BZip2TestCase.java b/src/test/java/org/apache/commons/compress/compressors/BZip2TestCase.java new file mode 100644 index 000000000..88aca8dec --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/BZip2TestCase.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class BZip2TestCase extends AbstractTestCase { + + @Test + public void testBzipCreation() throws Exception { + File output = null; + final File input = getFile("test.txt"); + { + output = new File(dir, "test.txt.bz2"); + final OutputStream out = new FileOutputStream(output); + final CompressorOutputStream cos = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out); + final FileInputStream in = new FileInputStream(input); + IOUtils.copy(in, cos); + cos.close(); + in.close(); + } + + final File decompressed = new File(dir, "decompressed.txt"); + { + final File toDecompress = output; + final InputStream is = new FileInputStream(toDecompress); + final CompressorInputStream in = + new CompressorStreamFactory().createCompressorInputStream("bzip2", is); + final FileOutputStream os = new FileOutputStream(decompressed); + IOUtils.copy(in, os); + is.close(); + os.close(); + } + + assertEquals(input.length(),decompressed.length()); + } + + @Test + public void testBzip2Unarchive() throws Exception { + final File input = getFile("bla.txt.bz2"); + final File output = new File(dir, "bla.txt"); + final InputStream is = new FileInputStream(input); + final CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream("bzip2", is); + final FileOutputStream os = new FileOutputStream(output); + IOUtils.copy(in, os); + is.close(); + os.close(); + } + + @Test + public void testConcatenatedStreamsReadFirstOnly() throws Exception { + final File input = getFile("multiple.bz2"); + try (InputStream is = new FileInputStream(input)) { + try (CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("bzip2", is)) { + assertEquals('a', in.read()); + assertEquals(-1, in.read()); + } + } + } + + @Test + public void testConcatenatedStreamsReadFully() throws Exception { + final File input = getFile("multiple.bz2"); + try (InputStream is = new FileInputStream(input)) { + try (CompressorInputStream in = new BZip2CompressorInputStream(is, true)) { + assertEquals('a', in.read()); + assertEquals('b', in.read()); + assertEquals(0, in.available()); + assertEquals(-1, in.read()); + } + } + } + + @Test + public void testCOMPRESS131() throws Exception { + final File input = getFile("COMPRESS-131.bz2"); + try (InputStream is = new FileInputStream(input)) { + try (CompressorInputStream in = new BZip2CompressorInputStream(is, true)) { + int l = 0; + while (in.read() != -1) { + l++; + } + assertEquals(539, l); + } + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/BZip2UtilsTestCase.java b/src/test/java/org/apache/commons/compress/compressors/BZip2UtilsTestCase.java new file mode 100644 index 000000000..27154efac --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/BZip2UtilsTestCase.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.junit.Assert.*; + +import org.apache.commons.compress.compressors.bzip2.BZip2Utils; +import org.junit.Test; + +public class BZip2UtilsTestCase { + + @Test + public void testIsCompressedFilename() { + assertFalse(BZip2Utils.isCompressedFilename("")); + assertFalse(BZip2Utils.isCompressedFilename(".gz")); + + assertTrue(BZip2Utils.isCompressedFilename("x.tbz2")); + assertTrue(BZip2Utils.isCompressedFilename("x.tbz")); + assertTrue(BZip2Utils.isCompressedFilename("x.bz2")); + assertTrue(BZip2Utils.isCompressedFilename("x.bz")); + + assertFalse(BZip2Utils.isCompressedFilename("xbz2")); + assertFalse(BZip2Utils.isCompressedFilename("xbz")); + + assertTrue(BZip2Utils.isCompressedFilename("x.TBZ2")); + assertTrue(BZip2Utils.isCompressedFilename("x.Tbz2")); + assertTrue(BZip2Utils.isCompressedFilename("x.tbZ2")); + + assertFalse(BZip2Utils.isCompressedFilename("x.bz ")); + assertFalse(BZip2Utils.isCompressedFilename("x.tbz\n")); + assertFalse(BZip2Utils.isCompressedFilename("x.tbz2.y")); + } + + @Test + public void testGetUncompressedFilename() { + assertEquals("", BZip2Utils.getUncompressedFilename("")); + assertEquals(".bz2", BZip2Utils.getUncompressedFilename(".bz2")); + + assertEquals("x.tar", BZip2Utils.getUncompressedFilename("x.tbz2")); + assertEquals("x.tar", BZip2Utils.getUncompressedFilename("x.tbz")); + assertEquals("x", BZip2Utils.getUncompressedFilename("x.bz2")); + assertEquals("x", BZip2Utils.getUncompressedFilename("x.bz")); + + assertEquals("x.tar", BZip2Utils.getUncompressedFilename("x.TBZ2")); + assertEquals("X.tar", BZip2Utils.getUncompressedFilename("X.Tbz2")); + assertEquals("X.tar", BZip2Utils.getUncompressedFilename("X.tbZ2")); + + assertEquals("x.bz ", BZip2Utils.getUncompressedFilename("x.bz ")); + assertEquals("x.tbz\n", BZip2Utils.getUncompressedFilename("x.tbz\n")); + assertEquals("x.tbz2.y", BZip2Utils.getUncompressedFilename("x.tbz2.y")); + } + + @Test + public void testGetCompressedFilename() { + assertEquals(".bz2", BZip2Utils.getCompressedFilename("")); + assertEquals(" .bz2", BZip2Utils.getCompressedFilename(" ")); + assertEquals("x.bz2", BZip2Utils.getCompressedFilename("x")); + assertEquals("X.bz2", BZip2Utils.getCompressedFilename("X")); + assertEquals("x.tar.bz2", BZip2Utils.getCompressedFilename("x.tar")); + assertEquals("x.tar.bz2", BZip2Utils.getCompressedFilename("x.TAR")); + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/CompressorServiceLoaderTest.java b/src/test/java/org/apache/commons/compress/compressors/CompressorServiceLoaderTest.java new file mode 100644 index 000000000..d1027ef2b --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/CompressorServiceLoaderTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; + +import org.apache.commons.compress.compressors.TestCompressorStreamProvider.InvocationConfirmationException; +import org.junit.Test; + +public class CompressorServiceLoaderTest { + + @Test(expected = InvocationConfirmationException.class) + public void testInputStream() throws CompressorException { + new CompressorStreamFactory().createCompressorInputStream("TestInput1", new ByteArrayInputStream(new byte[] {})); + } + + @Test(expected = InvocationConfirmationException.class) + public void testOutputStream() throws CompressorException { + new CompressorStreamFactory().createCompressorOutputStream("TestOutput1", new ByteArrayOutputStream()); + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/CompressorStreamFactoryRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/CompressorStreamFactoryRoundtripTest.java new file mode 100644 index 000000000..82ff6e898 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/CompressorStreamFactoryRoundtripTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; + +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class CompressorStreamFactoryRoundtripTest { + + @Parameters(name = "{0}") + public static String[] data() { + return new String[] { // + CompressorStreamFactory.BZIP2, // + CompressorStreamFactory.DEFLATE, // + CompressorStreamFactory.GZIP, // + // CompressorStreamFactory.LZMA, // Not implemented yet + // CompressorStreamFactory.PACK200, // Bug + // CompressorStreamFactory.SNAPPY_FRAMED, // Not implemented yet + // CompressorStreamFactory.SNAPPY_RAW, // Not implemented yet + CompressorStreamFactory.XZ, // + // CompressorStreamFactory.Z, // Not implemented yet + }; + } + + private final String compressorName; + + public CompressorStreamFactoryRoundtripTest(final String compressorName) { + this.compressorName = compressorName; + } + + @Test + public void testCompressorStreamFactoryRoundtrip() throws Exception { + final CompressorStreamProvider factory = new CompressorStreamFactory(); + final ByteArrayOutputStream compressedOs = new ByteArrayOutputStream(); + final CompressorOutputStream compressorOutputStream = factory.createCompressorOutputStream(compressorName, + compressedOs); + final String fixture = "The quick brown fox jumps over the lazy dog"; + compressorOutputStream.write(fixture.getBytes("UTF-8")); + compressorOutputStream.flush(); + compressorOutputStream.close(); + final ByteArrayInputStream is = new ByteArrayInputStream(compressedOs.toByteArray()); + final CompressorInputStream compressorInputStream = factory.createCompressorInputStream(compressorName, is, false); + final ByteArrayOutputStream decompressedOs = new ByteArrayOutputStream(); + IOUtils.copy(compressorInputStream, decompressedOs); + compressorInputStream.close(); + decompressedOs.flush(); + decompressedOs.close(); + Assert.assertEquals(fixture, decompressedOs.toString("UTF-8")); + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/DeflateTestCase.java b/src/test/java/org/apache/commons/compress/compressors/DeflateTestCase.java new file mode 100644 index 000000000..468cb5766 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/DeflateTestCase.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream; +import org.apache.commons.compress.compressors.deflate.DeflateCompressorOutputStream; +import org.apache.commons.compress.compressors.deflate.DeflateParameters; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class DeflateTestCase extends AbstractTestCase { + + /** + * Tests the creation of a DEFLATE archive with zlib header + * + * @throws Exception + */ + @Test + public void testDeflateCreation() throws Exception { + final File input = getFile("test1.xml"); + final File output = new File(dir, "test1.xml.deflatez"); + try (OutputStream out = new FileOutputStream(output)) { + try (CompressorOutputStream cos = new CompressorStreamFactory() + .createCompressorOutputStream("deflate", out)) { + IOUtils.copy(new FileInputStream(input), cos); + } + } + } + + /** + * Tests the creation of a "raw" DEFLATE archive (without zlib header) + * + * @throws Exception + */ + @Test + public void testRawDeflateCreation() throws Exception { + final File input = getFile("test1.xml"); + final File output = new File(dir, "test1.xml.deflate"); + try (OutputStream out = new FileOutputStream(output)) { + final DeflateParameters params = new DeflateParameters(); + params.setWithZlibHeader(false); + try (CompressorOutputStream cos = new DeflateCompressorOutputStream(out, params)) { + IOUtils.copy(new FileInputStream(input), cos); + } + } + } + + /** + * Tests the extraction of a DEFLATE archive with zlib header + * + * @throws Exception + */ + @Test + public void testDeflateUnarchive() throws Exception { + final File input = getFile("bla.tar.deflatez"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("deflate", is); // zlib header is expected by default + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + + /** + * Tests the extraction of a "raw" DEFLATE archive (without zlib header) + * + * @throws Exception + */ + @Test + public void testRawDeflateUnarchive() throws Exception { + final File input = getFile("bla.tar.deflate"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final DeflateParameters params = new DeflateParameters(); + params.setWithZlibHeader(false); + final CompressorInputStream in = new DeflateCompressorInputStream(is, params); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java b/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java new file mode 100644 index 000000000..a59bc605a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.MemoryLimitException; +import org.apache.commons.compress.MockEvilInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.junit.Test; + +@SuppressWarnings("deprecation") // deliberately tests setDecompressConcatenated +public final class DetectCompressorTestCase { + + final CompressorStreamFactory factory = new CompressorStreamFactory(); + private static final CompressorStreamFactory factoryTrue = new CompressorStreamFactory(true); + private static final CompressorStreamFactory factoryFalse = new CompressorStreamFactory(false); + + // Must be static to allow use in the TestData entries + private static final CompressorStreamFactory factorySetTrue; + private static final CompressorStreamFactory factorySetFalse; + + static { + factorySetTrue = new CompressorStreamFactory(); + factorySetTrue.setDecompressConcatenated(true); + factorySetFalse = new CompressorStreamFactory(); + factorySetFalse.setDecompressConcatenated(false); + } + + static class TestData { + final String fileName; // The multiple file name + final char[] entryNames; // expected entries ... + final CompressorStreamFactory factory; // ... when using this factory + final boolean concat; // expected value for decompressConcatenated + TestData(final String name, final char[] names, final CompressorStreamFactory factory, final boolean concat) { + this.fileName = name; + this.entryNames = names; + this.factory = factory; + this.concat = concat; + } + } + + private final TestData[] tests = { + new TestData("multiple.bz2", new char[]{'a','b'}, factoryTrue, true), + new TestData("multiple.bz2", new char[]{'a','b'}, factorySetTrue, true), + new TestData("multiple.bz2", new char[]{'a'}, factoryFalse, false), + new TestData("multiple.bz2", new char[]{'a'}, factorySetFalse, false), + new TestData("multiple.bz2", new char[]{'a'}, factory, false), + + new TestData("multiple.gz", new char[]{'a','b'}, factoryTrue, true), + new TestData("multiple.gz", new char[]{'a','b'}, factorySetTrue, true), + new TestData("multiple.gz", new char[]{'a'}, factoryFalse, false), + new TestData("multiple.gz", new char[]{'a'}, factorySetFalse, false), + new TestData("multiple.gz", new char[]{'a'}, factory, false), + + new TestData("multiple.xz", new char[]{'a','b'}, factoryTrue, true), + new TestData("multiple.xz", new char[]{'a','b'}, factorySetTrue, true), + new TestData("multiple.xz", new char[]{'a'}, factoryFalse, false), + new TestData("multiple.xz", new char[]{'a'}, factorySetFalse, false), + new TestData("multiple.xz", new char[]{'a'}, factory, false), + }; + + @Test + public void testDetection() throws Exception { + final CompressorInputStream bzip2 = getStreamFor("bla.txt.bz2"); + assertNotNull(bzip2); + assertTrue(bzip2 instanceof BZip2CompressorInputStream); + + final CompressorInputStream gzip = getStreamFor("bla.tgz"); + assertNotNull(gzip); + assertTrue(gzip instanceof GzipCompressorInputStream); + + final CompressorInputStream pack200 = getStreamFor("bla.pack"); + assertNotNull(pack200); + assertTrue(pack200 instanceof Pack200CompressorInputStream); + + final CompressorInputStream xz = getStreamFor("bla.tar.xz"); + assertNotNull(xz); + assertTrue(xz instanceof XZCompressorInputStream); + + final CompressorInputStream zlib = getStreamFor("bla.tar.deflatez"); + assertNotNull(zlib); + assertTrue(zlib instanceof DeflateCompressorInputStream); + + final CompressorInputStream zstd = getStreamFor("bla.tar.zst"); + assertNotNull(zstd); + assertTrue(zstd instanceof ZstdCompressorInputStream); + + try { + factory.createCompressorInputStream(new ByteArrayInputStream(new byte[0])); + fail("No exception thrown for an empty input stream"); + } catch (final CompressorException e) { + // expected + } + } + + @Test + public void testDetect() throws Exception { + + assertEquals(CompressorStreamFactory.BZIP2, detect("bla.txt.bz2")); + assertEquals(CompressorStreamFactory.GZIP, detect("bla.tgz")); + assertEquals(CompressorStreamFactory.PACK200, detect("bla.pack")); + assertEquals(CompressorStreamFactory.XZ, detect("bla.tar.xz")); + assertEquals(CompressorStreamFactory.DEFLATE, detect("bla.tar.deflatez")); + assertEquals(CompressorStreamFactory.LZ4_FRAMED, detect("bla.tar.lz4")); + assertEquals(CompressorStreamFactory.LZMA, detect("bla.tar.lzma")); + assertEquals(CompressorStreamFactory.SNAPPY_FRAMED, detect("bla.tar.sz")); + assertEquals(CompressorStreamFactory.Z, detect("bla.tar.Z")); + assertEquals(CompressorStreamFactory.ZSTANDARD, detect("bla.tar.zst")); + + //make sure we don't oom on detect + assertEquals(CompressorStreamFactory.Z, detect("COMPRESS-386")); + assertEquals(CompressorStreamFactory.LZMA, detect("COMPRESS-382")); + + try { + CompressorStreamFactory.detect(new BufferedInputStream(new ByteArrayInputStream(new byte[0]))); + fail("shouldn't be able to detect empty stream"); + } catch (CompressorException e) { + assertEquals("No Compressor found for the stream signature.", e.getMessage()); + } + + try { + CompressorStreamFactory.detect(null); + fail("shouldn't be able to detect null stream"); + } catch (IllegalArgumentException e) { + assertEquals("Stream must not be null.", e.getMessage()); + } + + try { + CompressorStreamFactory.detect(new BufferedInputStream(new MockEvilInputStream())); + fail("Expected IOException"); + } catch (CompressorException e) { + assertEquals("IOException while reading signature.", e.getMessage()); + } + + + } + + private String detect(String testFileName) throws IOException, CompressorException { + String name = null; + try (InputStream is = new BufferedInputStream( + new FileInputStream(getFile(testFileName)))) { + name = CompressorStreamFactory.detect(is); + } + return name; + } + + @Test(expected = MemoryLimitException.class) + public void testLZMAMemoryLimit() throws Exception { + getStreamFor("COMPRESS-382", 100); + } + + @Test(expected = MemoryLimitException.class) + public void testZMemoryLimit() throws Exception { + getStreamFor("COMPRESS-386", 100); + } + + @Test(expected = MemoryLimitException.class) + public void testXZMemoryLimitOnRead() throws Exception { + //Even though the file is very small, the memory limit + //has to be quite large (8296 KiB) because of the dictionary size + + //This is triggered on read(); not during initialization. + //This test is here instead of the xz unit test to make sure + //that the parameter is properly passed via the CompressorStreamFactory + try (InputStream compressorIs = getStreamFor("bla.tar.xz", 100)) { + compressorIs.read(); + } + } + + @Test(expected = MemoryLimitException.class) + public void testXZMemoryLimitOnSkip() throws Exception { + try (InputStream compressorIs = getStreamFor("bla.tar.xz", 100)) { + compressorIs.skip(10); + } + } + + private InputStream getStreamFor(final String fileName, final int memoryLimitInKb) throws Exception { + CompressorStreamFactory fac = new CompressorStreamFactory(true, + memoryLimitInKb); + InputStream is = new BufferedInputStream( + new FileInputStream(getFile(fileName))); + try { + return fac.createCompressorInputStream(is); + } catch (CompressorException e) { + if (e.getCause() != null && e.getCause() instanceof Exception) { + //unwrap cause to reveal MemoryLimitException + throw (Exception)e.getCause(); + } else { + throw e; + } + } + + } + + + @Test + public void testOverride() { + CompressorStreamFactory fac = new CompressorStreamFactory(); + assertFalse(fac.getDecompressConcatenated()); + fac.setDecompressConcatenated(true); + assertTrue(fac.getDecompressConcatenated()); + + fac = new CompressorStreamFactory(false); + assertFalse(fac.getDecompressConcatenated()); + try { + fac.setDecompressConcatenated(true); + fail("Expected IllegalStateException"); + } catch (final IllegalStateException ise) { + // expected + } + + fac = new CompressorStreamFactory(true); + assertTrue(fac.getDecompressConcatenated()); + try { + fac.setDecompressConcatenated(true); + fail("Expected IllegalStateException"); + } catch (final IllegalStateException ise) { + // expected + } + } + + @Test + public void testMutiples() throws Exception { + for(int i=0; i <tests.length; i++) { + final TestData test = tests[i]; + final CompressorStreamFactory fac = test.factory; + assertNotNull("Test entry "+i, fac); + assertEquals("Test entry "+i, test.concat, fac.getDecompressConcatenated()); + final CompressorInputStream in = getStreamFor(test.fileName, fac); + assertNotNull("Test entry "+i,in); + for (final char entry : test.entryNames) { + assertEquals("Test entry" + i, entry, in.read()); + } + assertEquals(0, in.available()); + assertEquals(-1, in.read()); + } + } + + private CompressorInputStream getStreamFor(final String resource) + throws CompressorException, IOException { + return factory.createCompressorInputStream( + new BufferedInputStream(new FileInputStream( + getFile(resource)))); + } + + private CompressorInputStream getStreamFor(final String resource, final CompressorStreamFactory factory) + throws CompressorException, IOException { + return factory.createCompressorInputStream( + new BufferedInputStream(new FileInputStream( + getFile(resource)))); + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/FramedSnappyTestCase.java b/src/test/java/org/apache/commons/compress/compressors/FramedSnappyTestCase.java new file mode 100644 index 000000000..133c7fe50 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/FramedSnappyTestCase.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.junit.Assert.*; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Random; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class FramedSnappyTestCase + extends AbstractTestCase { + + @Test + public void testDefaultExtraction() throws Exception { + testUnarchive(new StreamWrapper<CompressorInputStream>() { + @Override + public CompressorInputStream wrap(final InputStream is) throws IOException { + return new FramedSnappyCompressorInputStream(is); + } + }); + } + + @Test + public void testDefaultExtractionViaFactory() throws Exception { + testUnarchive(new StreamWrapper<CompressorInputStream>() { + @Override + public CompressorInputStream wrap(final InputStream is) throws Exception { + return new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.SNAPPY_FRAMED, + is); + } + }); + } + + @Test + public void testDefaultExtractionViaFactoryAutodetection() throws Exception { + testUnarchive(new StreamWrapper<CompressorInputStream>() { + @Override + public CompressorInputStream wrap(final InputStream is) throws Exception { + return new CompressorStreamFactory().createCompressorInputStream(is); + } + }); + } + + private void testUnarchive(final StreamWrapper<CompressorInputStream> wrapper) throws Exception { + final File input = getFile("bla.tar.sz"); + final File output = new File(dir, "bla.tar"); + try (FileInputStream is = new FileInputStream(input)) { + // the intermediate BufferedInputStream is there for mark + // support in the autodetection test + final CompressorInputStream in = wrapper.wrap(new BufferedInputStream(is)); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + assertEquals(995, in.getBytesRead()); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + final File original = getFile("bla.tar"); + try (FileInputStream written = new FileInputStream(output)) { + try (FileInputStream orig = new FileInputStream(original)) { + assertArrayEquals(IOUtils.toByteArray(written), + IOUtils.toByteArray(orig)); + } + } + } + + @Test + public void testRoundtrip() throws Exception { + testRoundtrip(getFile("test.txt")); + testRoundtrip(getFile("bla.tar")); + testRoundtrip(getFile("COMPRESS-256.7z")); + } + + @Test + public void testRoundtripWithOneBigWrite() throws Exception { + Random r = new Random(); + File input = new File(dir, "bigChunkTest"); + try (FileOutputStream fs = new FileOutputStream(input)) { + for (int i = 0 ; i < 1 << 17; i++) { + fs.write(r.nextInt(256)); + } + } + long start = System.currentTimeMillis(); + final File outputSz = new File(dir, input.getName() + ".sz"); + try (FileInputStream is = new FileInputStream(input); + FileOutputStream os = new FileOutputStream(outputSz); + CompressorOutputStream sos = new CompressorStreamFactory() + .createCompressorOutputStream("snappy-framed", os)) { + byte[] b = IOUtils.toByteArray(is); + sos.write(b[0]); + sos.write(b, 1, b.length - 1); // must be split into multiple compressed chunks + } + System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); + try (FileInputStream is = new FileInputStream(input); + CompressorInputStream sis = new CompressorStreamFactory() + .createCompressorInputStream("snappy-framed", new FileInputStream(outputSz))) { + byte[] expected = IOUtils.toByteArray(is); + byte[] actual = IOUtils.toByteArray(sis); + assertArrayEquals(expected, actual); + } + } + + private void testRoundtrip(File input) throws Exception { + long start = System.currentTimeMillis(); + final File outputSz = new File(dir, input.getName() + ".sz"); + try (FileInputStream is = new FileInputStream(input); + FileOutputStream os = new FileOutputStream(outputSz); + CompressorOutputStream sos = new CompressorStreamFactory() + .createCompressorOutputStream("snappy-framed", os)) { + IOUtils.copy(is, sos); + } + System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); + try (FileInputStream is = new FileInputStream(input); + CompressorInputStream sis = new CompressorStreamFactory() + .createCompressorInputStream("snappy-framed", new FileInputStream(outputSz))) { + byte[] expected = IOUtils.toByteArray(is); + byte[] actual = IOUtils.toByteArray(sis); + assertArrayEquals(expected, actual); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/GZipTestCase.java b/src/test/java/org/apache/commons/compress/compressors/GZipTestCase.java new file mode 100644 index 000000000..9faa874af --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/GZipTestCase.java @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.zip.Deflater; +import java.util.zip.GZIPInputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.compress.compressors.gzip.GzipParameters; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public final class GZipTestCase extends AbstractTestCase { + + @Test + public void testGzipCreation() throws Exception { + final File input = getFile("test1.xml"); + final File output = new File(dir, "test1.xml.gz"); + try (OutputStream out = new FileOutputStream(output)) { + try (CompressorOutputStream cos = new CompressorStreamFactory() + .createCompressorOutputStream("gz", out)) { + IOUtils.copy(new FileInputStream(input), cos); + } + } + } + + @Test + public void testGzipUnarchive() throws Exception { + final File input = getFile("bla.tgz"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("gz", is); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + + @Test + public void testConcatenatedStreamsReadFirstOnly() throws Exception { + final File input = getFile("multiple.gz"); + try (InputStream is = new FileInputStream(input)) { + try (CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("gz", is)) { + assertEquals('a', in.read()); + assertEquals(-1, in.read()); + } + } + } + + @Test + public void testConcatenatedStreamsReadFully() throws Exception { + final File input = getFile("multiple.gz"); + try (InputStream is = new FileInputStream(input)) { + try (CompressorInputStream in = new GzipCompressorInputStream(is, true)) { + assertEquals('a', in.read()); + assertEquals('b', in.read()); + assertEquals(0, in.available()); + assertEquals(-1, in.read()); + } + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-84" + */ + @Test + public void testCorruptedInput() throws Exception { + InputStream in = null; + OutputStream out = null; + CompressorInputStream cin = null; + try { + in = new FileInputStream(getFile("bla.tgz")); + out = new ByteArrayOutputStream(); + IOUtils.copy(in, out); + in.close(); + out.close(); + + final byte[] data = ((ByteArrayOutputStream) out).toByteArray(); + in = new ByteArrayInputStream(data, 0, data.length - 1); + cin = new CompressorStreamFactory() + .createCompressorInputStream("gz", in); + out = new ByteArrayOutputStream(); + + try { + IOUtils.copy(cin, out); + fail("Expected an exception"); + } catch (final IOException ioex) { + // the whole point of the test + } + + } finally { + if (out != null) { + out.close(); + } + if (cin != null) { + cin.close(); + } + if (in != null) { + in.close(); + } + } + } + + @Test + public void testInteroperabilityWithGzipCompressorInputStream() throws Exception { + byte[] content; + try (FileInputStream fis = new FileInputStream(getFile("test3.xml"))) { + content = IOUtils.toByteArray(fis); + } + + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + + final GzipParameters parameters = new GzipParameters(); + parameters.setCompressionLevel(Deflater.BEST_COMPRESSION); + parameters.setOperatingSystem(3); + parameters.setFilename("test3.xml"); + parameters.setComment("Test file"); + parameters.setModificationTime(System.currentTimeMillis()); + final GzipCompressorOutputStream out = new GzipCompressorOutputStream(bout, parameters); + out.write(content); + out.flush(); + out.close(); + + final GzipCompressorInputStream in = new GzipCompressorInputStream(new ByteArrayInputStream(bout.toByteArray())); + final byte[] content2 = IOUtils.toByteArray(in); + + Assert.assertArrayEquals("uncompressed content", content, content2); + } + + @Test + public void testInteroperabilityWithGZIPInputStream() throws Exception { + byte[] content; + try (FileInputStream fis = new FileInputStream(getFile("test3.xml"))) { + content = IOUtils.toByteArray(fis); + } + + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + + final GzipParameters parameters = new GzipParameters(); + parameters.setCompressionLevel(Deflater.BEST_COMPRESSION); + parameters.setOperatingSystem(3); + parameters.setFilename("test3.xml"); + parameters.setComment("Test file"); + parameters.setModificationTime(System.currentTimeMillis()); + final GzipCompressorOutputStream out = new GzipCompressorOutputStream(bout, parameters); + out.write(content); + out.flush(); + out.close(); + + final GZIPInputStream in = new GZIPInputStream(new ByteArrayInputStream(bout.toByteArray())); + final byte[] content2 = IOUtils.toByteArray(in); + + Assert.assertArrayEquals("uncompressed content", content, content2); + } + + @Test + public void testInvalidCompressionLevel() { + final GzipParameters parameters = new GzipParameters(); + try { + parameters.setCompressionLevel(10); + fail("IllegalArgumentException not thrown"); + } catch (final IllegalArgumentException e) { + // expected + } + + try { + parameters.setCompressionLevel(-5); + fail("IllegalArgumentException not thrown"); + } catch (final IllegalArgumentException e) { + // expected + } + } + + private void testExtraFlags(final int compressionLevel, final int flag) throws Exception { + byte[] content; + try (FileInputStream fis = new FileInputStream(getFile("test3.xml"))) { + content = IOUtils.toByteArray(fis); + } + + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + + final GzipParameters parameters = new GzipParameters(); + parameters.setCompressionLevel(compressionLevel); + final GzipCompressorOutputStream out = new GzipCompressorOutputStream(bout, parameters); + IOUtils.copy(new ByteArrayInputStream(content), out); + out.flush(); + out.close(); + + assertEquals("extra flags (XFL)", flag, bout.toByteArray()[8]); + } + + @Test + public void testExtraFlagsFastestCompression() throws Exception { + testExtraFlags(Deflater.BEST_SPEED, 4); + } + + @Test + public void testExtraFlagsBestCompression() throws Exception { + testExtraFlags(Deflater.BEST_COMPRESSION, 2); + } + + @Test + public void testExtraFlagsDefaultCompression() throws Exception { + testExtraFlags(Deflater.DEFAULT_COMPRESSION, 0); + } + + @Test + public void testOverWrite() throws Exception { + final GzipCompressorOutputStream out = new GzipCompressorOutputStream(new ByteArrayOutputStream()); + out.close(); + try { + out.write(0); + fail("IOException expected"); + } catch (final IOException e) { + // expected + } + } + + @Test + public void testMetadataRoundTrip() throws Exception { + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + + final GzipParameters parameters = new GzipParameters(); + parameters.setCompressionLevel(Deflater.BEST_COMPRESSION); + parameters.setModificationTime(123456000); + parameters.setOperatingSystem(13); + parameters.setFilename("test3.xml"); + parameters.setComment("Umlaute möglich?"); + try (GzipCompressorOutputStream out = new GzipCompressorOutputStream(bout, parameters); FileInputStream fis = new FileInputStream(getFile("test3" + + ".xml"))) { + IOUtils.copy(fis, out); + } + + final GzipCompressorInputStream input = + new GzipCompressorInputStream(new ByteArrayInputStream(bout.toByteArray())); + input.close(); + final GzipParameters readParams = input.getMetaData(); + assertEquals(Deflater.BEST_COMPRESSION, readParams.getCompressionLevel()); + assertEquals(123456000, readParams.getModificationTime()); + assertEquals(13, readParams.getOperatingSystem()); + assertEquals("test3.xml", readParams.getFilename()); + assertEquals("Umlaute möglich?", readParams.getComment()); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tgz"); + try (InputStream is = new FileInputStream(input)) { + final GzipCompressorInputStream in = + new GzipCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tgz"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final GzipCompressorInputStream in = + new GzipCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/GzipUtilsTestCase.java b/src/test/java/org/apache/commons/compress/compressors/GzipUtilsTestCase.java new file mode 100644 index 000000000..aa688b574 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/GzipUtilsTestCase.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.junit.Assert.*; + +import org.apache.commons.compress.compressors.gzip.GzipUtils; +import org.junit.Test; + +public class GzipUtilsTestCase { + + @Test + public void testIsCompressedFilename() { + assertFalse(GzipUtils.isCompressedFilename("")); + assertFalse(GzipUtils.isCompressedFilename(".gz")); + + assertTrue(GzipUtils.isCompressedFilename("x.tgz")); + assertTrue(GzipUtils.isCompressedFilename("x.taz")); + assertTrue(GzipUtils.isCompressedFilename("x.svgz")); + assertTrue(GzipUtils.isCompressedFilename("x.cpgz")); + assertTrue(GzipUtils.isCompressedFilename("x.wmz")); + assertTrue(GzipUtils.isCompressedFilename("x.emz")); + assertTrue(GzipUtils.isCompressedFilename("x.gz")); + assertTrue(GzipUtils.isCompressedFilename("x.z")); + assertTrue(GzipUtils.isCompressedFilename("x-gz")); + assertTrue(GzipUtils.isCompressedFilename("x-z")); + assertTrue(GzipUtils.isCompressedFilename("x_z")); + + assertFalse(GzipUtils.isCompressedFilename("xxgz")); + assertFalse(GzipUtils.isCompressedFilename("xzz")); + assertFalse(GzipUtils.isCompressedFilename("xaz")); + + assertTrue(GzipUtils.isCompressedFilename("x.SVGZ")); + assertTrue(GzipUtils.isCompressedFilename("x.Svgz")); + assertTrue(GzipUtils.isCompressedFilename("x.svGZ")); + + assertFalse(GzipUtils.isCompressedFilename("x.wmz ")); + assertFalse(GzipUtils.isCompressedFilename("x.wmz\n")); + assertFalse(GzipUtils.isCompressedFilename("x.wmz.y")); + } + + @Test + public void testGetUncompressedFilename() { + assertEquals("", GzipUtils.getUncompressedFilename("")); + assertEquals(".gz", GzipUtils.getUncompressedFilename(".gz")); + + assertEquals("x.tar", GzipUtils.getUncompressedFilename("x.tgz")); + assertEquals("x.tar", GzipUtils.getUncompressedFilename("x.taz")); + assertEquals("x.svg", GzipUtils.getUncompressedFilename("x.svgz")); + assertEquals("x.cpio", GzipUtils.getUncompressedFilename("x.cpgz")); + assertEquals("x.wmf", GzipUtils.getUncompressedFilename("x.wmz")); + assertEquals("x.emf", GzipUtils.getUncompressedFilename("x.emz")); + assertEquals("x", GzipUtils.getUncompressedFilename("x.gz")); + assertEquals("x", GzipUtils.getUncompressedFilename("x.z")); + assertEquals("x", GzipUtils.getUncompressedFilename("x-gz")); + assertEquals("x", GzipUtils.getUncompressedFilename("x-z")); + assertEquals("x", GzipUtils.getUncompressedFilename("x_z")); + + assertEquals("x.svg", GzipUtils.getUncompressedFilename("x.SVGZ")); + assertEquals("X.svg", GzipUtils.getUncompressedFilename("X.SVGZ")); + assertEquals("X.svg", GzipUtils.getUncompressedFilename("X.svGZ")); + + assertEquals("x.wmz ", GzipUtils.getUncompressedFilename("x.wmz ")); + assertEquals("x.wmz\n", GzipUtils.getUncompressedFilename("x.wmz\n")); + assertEquals("x.wmz.y", GzipUtils.getUncompressedFilename("x.wmz.y")); + } + + @Test + public void testGetCompressedFilename() { + assertEquals(".gz", GzipUtils.getCompressedFilename("")); + assertEquals("x.gz", GzipUtils.getCompressedFilename("x")); + + assertEquals("x.tgz", GzipUtils.getCompressedFilename("x.tar")); + assertEquals("x.svgz", GzipUtils.getCompressedFilename("x.svg")); + assertEquals("x.cpgz", GzipUtils.getCompressedFilename("x.cpio")); + assertEquals("x.wmz", GzipUtils.getCompressedFilename("x.wmf")); + assertEquals("x.emz", GzipUtils.getCompressedFilename("x.emf")); + + assertEquals("x.svgz", GzipUtils.getCompressedFilename("x.SVG")); + assertEquals("X.svgz", GzipUtils.getCompressedFilename("X.SVG")); + assertEquals("X.svgz", GzipUtils.getCompressedFilename("X.svG")); + + assertEquals("x.wmf .gz", GzipUtils.getCompressedFilename("x.wmf ")); + assertEquals("x.wmf\n.gz", GzipUtils.getCompressedFilename("x.wmf\n")); + assertEquals("x.wmf.y.gz", GzipUtils.getCompressedFilename("x.wmf.y")); + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/LZMATestCase.java b/src/test/java/org/apache/commons/compress/compressors/LZMATestCase.java new file mode 100644 index 000000000..3d2ab027b --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/LZMATestCase.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.lzma.LZMACompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public final class LZMATestCase extends AbstractTestCase { + + @Test + public void lzmaRoundtrip() throws Exception { + final File input = getFile("test1.xml"); + final File compressed = new File(dir, "test1.xml.xz"); + try (OutputStream out = new FileOutputStream(compressed)) { + try (CompressorOutputStream cos = new CompressorStreamFactory() + .createCompressorOutputStream("lzma", out)) { + IOUtils.copy(new FileInputStream(input), cos); + } + } + byte[] orig; + try (InputStream is = new FileInputStream(input)) { + orig = IOUtils.toByteArray(is); + } + byte[] uncompressed; + try (InputStream is = new FileInputStream(compressed); + CompressorInputStream in = new LZMACompressorInputStream(is)) { + uncompressed = IOUtils.toByteArray(in); + } + Assert.assertArrayEquals(orig, uncompressed); + } + + @Test + public void testLZMAUnarchive() throws Exception { + final File input = getFile("bla.tar.lzma"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final CompressorInputStream in = new LZMACompressorInputStream(is); + copy(in, output); + } + } + + @Test + public void testLZMAUnarchiveWithAutodetection() throws Exception { + final File input = getFile("bla.tar.lzma"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new BufferedInputStream(new FileInputStream(input))) { + final CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream(is); + copy(in, output); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.lzma"); + try (InputStream is = new FileInputStream(input)) { + final LZMACompressorInputStream in = + new LZMACompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.lzma"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final LZMACompressorInputStream in = + new LZMACompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } + + private void copy(final InputStream in, final File output) throws IOException { + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/Pack200TestCase.java b/src/test/java/org/apache/commons/compress/compressors/Pack200TestCase.java new file mode 100644 index 000000000..d346185ae --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/Pack200TestCase.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream; +import org.apache.commons.compress.compressors.pack200.Pack200CompressorOutputStream; +import org.apache.commons.compress.compressors.pack200.Pack200Strategy; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class Pack200TestCase extends AbstractTestCase { + + @Test + public void testJarUnarchiveAllInMemory() throws Exception { + jarUnarchiveAll(false, Pack200Strategy.IN_MEMORY); + } + + @Test + public void testJarUnarchiveAllFileArgInMemory() throws Exception { + jarUnarchiveAll(true, Pack200Strategy.IN_MEMORY); + } + + @Test + public void testJarUnarchiveAllTempFile() throws Exception { + jarUnarchiveAll(false, Pack200Strategy.TEMP_FILE); + } + + @Test + public void testJarUnarchiveAllFileTempFile() throws Exception { + jarUnarchiveAll(true, Pack200Strategy.TEMP_FILE); + } + + private void jarUnarchiveAll(final boolean useFile, final Pack200Strategy mode) + throws Exception { + final File input = getFile("bla.pack"); + try (InputStream is = useFile + ? new Pack200CompressorInputStream(input, mode) + : new Pack200CompressorInputStream(new FileInputStream(input), + mode)) { + final ArchiveInputStream in = new ArchiveStreamFactory() + .createArchiveInputStream("jar", is); + + ArchiveEntry entry = in.getNextEntry(); + while (entry != null) { + final File archiveEntry = new File(dir, entry.getName()); + archiveEntry.getParentFile().mkdirs(); + if (entry.isDirectory()) { + archiveEntry.mkdir(); + entry = in.getNextEntry(); + continue; + } + final OutputStream out = new FileOutputStream(archiveEntry); + IOUtils.copy(in, out); + out.close(); + entry = in.getNextEntry(); + } + + in.close(); + } + } + + @Test + public void testJarArchiveCreationInMemory() throws Exception { + jarArchiveCreation(Pack200Strategy.IN_MEMORY); + } + + @Test + public void testJarArchiveCreationTempFile() throws Exception { + jarArchiveCreation(Pack200Strategy.TEMP_FILE); + } + + private void jarArchiveCreation(final Pack200Strategy mode) throws Exception { + final File output = new File(dir, "bla.pack"); + + final File file1 = getFile("test1.xml"); + final File file2 = getFile("test2.xml"); + + try (OutputStream out = new Pack200CompressorOutputStream(new FileOutputStream(output), + mode)) { + final ArchiveOutputStream os = new ArchiveStreamFactory() + .createArchiveOutputStream("jar", out); + + os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); + IOUtils.copy(new FileInputStream(file1), os); + os.closeArchiveEntry(); + + os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); + IOUtils.copy(new FileInputStream(file2), os); + os.closeArchiveEntry(); + + os.close(); + } + + try (InputStream is = new Pack200CompressorInputStream(output)) { + final ArchiveInputStream in = new ArchiveStreamFactory() + .createArchiveInputStream("jar", is); + final List<String> files = new ArrayList<>(); + files.add("testdata/test1.xml"); + files.add("testdata/test2.xml"); + checkArchiveContent(in, files); + in.close(); + } + } + + @Test + public void testGoodSignature() throws Exception { + try (InputStream is = new FileInputStream(getFile("bla.pack"))) { + final byte[] sig = new byte[4]; + is.read(sig); + assertTrue(Pack200CompressorInputStream.matches(sig, 4)); + } + } + + @Test + public void testBadSignature() throws Exception { + try (InputStream is = new FileInputStream(getFile("bla.jar"))) { + final byte[] sig = new byte[4]; + is.read(sig); + assertFalse(Pack200CompressorInputStream.matches(sig, 4)); + } + } + + @Test + public void testShortSignature() throws Exception { + try (InputStream is = new FileInputStream(getFile("bla.pack"))) { + final byte[] sig = new byte[2]; + is.read(sig); + assertFalse(Pack200CompressorInputStream.matches(sig, 2)); + } + } + + @Test + public void testInputStreamMethods() throws Exception { + final Map<String, String> m = new HashMap<>(); + m.put("foo", "bar"); + try (InputStream is = new Pack200CompressorInputStream(new FileInputStream(getFile("bla.jar")), + m)) { + // packed file is a jar, which is a zip so it starts with + // a local file header + assertTrue(is.markSupported()); + is.mark(5); + assertEquals(0x50, is.read()); + final byte[] rest = new byte[3]; + assertEquals(3, is.read(rest)); + assertEquals(0x4b, rest[0]); + assertEquals(3, rest[1]); + assertEquals(4, rest[2]); + assertEquals(1, is.skip(1)); + is.reset(); + assertEquals(0x50, is.read()); + assertTrue(is.available() > 0); + } + } + + @Test + public void testOutputStreamMethods() throws Exception { + final File output = new File(dir, "bla.pack"); + final Map<String, String> m = new HashMap<>(); + m.put("foo", "bar"); + try (OutputStream out = new FileOutputStream(output)) { + final OutputStream os = new Pack200CompressorOutputStream(out, m); + os.write(1); + os.write(new byte[] { 2, 3 }); + os.close(); + } + } + + @Test + public void singleByteReadFromMemoryConsistentlyReturnsMinusOneAtEof() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(Pack200Strategy.IN_MEMORY); + } + + @Test + public void singleByteReadFromTempFileConsistentlyReturnsMinusOneAtEof() throws Exception { + singleByteReadConsistentlyReturnsMinusOneAtEof(Pack200Strategy.TEMP_FILE); + } + + private void singleByteReadConsistentlyReturnsMinusOneAtEof(Pack200Strategy s) throws Exception { + final File input = getFile("bla.pack"); + try (final Pack200CompressorInputStream in = new Pack200CompressorInputStream(input, s)) { + IOUtils.toByteArray(in); + assertEquals(-1, in.read()); + assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadFromMemoryConsistentlyReturnsMinusOneAtEof() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(Pack200Strategy.IN_MEMORY); + } + + @Test + public void multiByteReadFromTempFileConsistentlyReturnsMinusOneAtEof() throws Exception { + multiByteReadConsistentlyReturnsMinusOneAtEof(Pack200Strategy.TEMP_FILE); + } + + private void multiByteReadConsistentlyReturnsMinusOneAtEof(Pack200Strategy s) throws Exception { + final File input = getFile("bla.pack"); + byte[] buf = new byte[2]; + try (final Pack200CompressorInputStream in = new Pack200CompressorInputStream(input, s)) { + IOUtils.toByteArray(in); + assertEquals(-1, in.read(buf)); + assertEquals(-1, in.read(buf)); + in.close(); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/TestCompressorStreamProvider.java b/src/test/java/org/apache/commons/compress/compressors/TestCompressorStreamProvider.java new file mode 100644 index 000000000..62d72e94c --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/TestCompressorStreamProvider.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors; + +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Set; + +import org.apache.commons.compress.utils.Sets; + +public class TestCompressorStreamProvider implements CompressorStreamProvider { + + public static final class InvocationConfirmationException extends CompressorException { + + private static final long serialVersionUID = 1L; + + public InvocationConfirmationException(final String message) { + super(message); + } + } + + @Override + public CompressorInputStream createCompressorInputStream(final String name, final InputStream in, + final boolean decompressConcatenated) throws CompressorException { + throw new InvocationConfirmationException(name); + } + + @Override + public CompressorOutputStream createCompressorOutputStream(final String name, final OutputStream out) + throws CompressorException { + throw new InvocationConfirmationException(name); + } + + @Override + public Set<String> getInputStreamCompressorNames() { + return Sets.newHashSet("TestInput1"); + } + + @Override + public Set<String> getOutputStreamCompressorNames() { + return Sets.newHashSet("TestOutput1"); + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/XZTestCase.java b/src/test/java/org/apache/commons/compress/compressors/XZTestCase.java new file mode 100644 index 000000000..cd605a98b --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/XZTestCase.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.management.ManagementFactory; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class XZTestCase extends AbstractTestCase { + + @Test + public void testXZCreation() throws Exception { + final long max = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax(); + System.out.println("XZTestCase: HeapMax="+max+" bytes "+(double)max/(1024*1024)+" MB"); + final File input = getFile("test1.xml"); + final File output = new File(dir, "test1.xml.xz"); + try (OutputStream out = new FileOutputStream(output)) { + try (CompressorOutputStream cos = new CompressorStreamFactory() + .createCompressorOutputStream("xz", out)) { + IOUtils.copy(new FileInputStream(input), cos); + } + } + } + + @Test + public void testXZUnarchive() throws Exception { + final File input = getFile("bla.tar.xz"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("xz", is); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + + @Test + public void testConcatenatedStreamsReadFirstOnly() throws Exception { + final File input = getFile("multiple.xz"); + try (InputStream is = new FileInputStream(input)) { + try (CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("xz", is)) { + assertEquals('a', in.read()); + assertEquals(-1, in.read()); + } + } + } + + @Test + public void testConcatenatedStreamsReadFully() throws Exception { + final File input = getFile("multiple.xz"); + try (InputStream is = new FileInputStream(input)) { + try (CompressorInputStream in = new XZCompressorInputStream(is, true)) { + assertEquals('a', in.read()); + assertEquals('b', in.read()); + assertEquals(0, in.available()); + assertEquals(-1, in.read()); + } + } + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/ZTestCase.java b/src/test/java/org/apache/commons/compress/compressors/ZTestCase.java new file mode 100644 index 000000000..b80a4a088 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/ZTestCase.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors; + +import static org.junit.Assert.*; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.z.ZCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class ZTestCase extends AbstractTestCase { + + @Test + public void testZUnarchive() throws Exception { + testUnarchive(new StreamWrapper<CompressorInputStream>() { + @Override + public CompressorInputStream wrap(final InputStream is) throws IOException { + return new ZCompressorInputStream(is); + } + }); + } + + @Test + public void testZUnarchiveViaFactory() throws Exception { + testUnarchive(new StreamWrapper<CompressorInputStream>() { + @Override + public CompressorInputStream wrap(final InputStream is) throws Exception { + return new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.Z, is); + } + }); + } + + @Test + public void testZUnarchiveViaAutoDetection() throws Exception { + testUnarchive(new StreamWrapper<CompressorInputStream>() { + @Override + public CompressorInputStream wrap(final InputStream is) throws Exception { + return new CompressorStreamFactory() + .createCompressorInputStream(new BufferedInputStream(is)); + } + }); + } + + @Test + public void testMatches() throws Exception { + assertFalse(ZCompressorInputStream.matches(new byte[] { 1, 2, 3, 4 }, 4)); + assertFalse(ZCompressorInputStream.matches(new byte[] { 0x1f, 2, 3, 4 }, 4)); + assertFalse(ZCompressorInputStream.matches(new byte[] { 1, (byte)0x9d, 3, 4 }, + 4)); + assertFalse(ZCompressorInputStream.matches(new byte[] { 0x1f, (byte) 0x9d, 3, 4 }, + 3)); + assertTrue(ZCompressorInputStream.matches(new byte[] { 0x1f, (byte) 0x9d, 3, 4 }, + 4)); + } + + private void testUnarchive(final StreamWrapper<CompressorInputStream> wrapper) throws Exception { + final File input = getFile("bla.tar.Z"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final InputStream in = wrapper.wrap(is); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/brotli/BrotliCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/brotli/BrotliCompressorInputStreamTest.java new file mode 100644 index 000000000..721ab33e0 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/brotli/BrotliCompressorInputStreamTest.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.brotli; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class BrotliCompressorInputStreamTest extends AbstractTestCase { + + /** + * Test bridge works fine + * @throws {@link IOException} + */ + @Test + public void testBrotliDecode() throws IOException { + final File input = getFile("brotli.testdata.compressed"); + final File expected = getFile("brotli.testdata.uncompressed"); + try (InputStream inputStream = new FileInputStream(input); + InputStream expectedStream = new FileInputStream(expected); + BrotliCompressorInputStream brotliInputStream = new BrotliCompressorInputStream(inputStream)) { + final byte[] b = new byte[20]; + IOUtils.readFully(expectedStream, b); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int readByte = -1; + while((readByte = brotliInputStream.read()) != -1) { + bos.write(readByte); + } + Assert.assertArrayEquals(b, bos.toByteArray()); + } + } + + @Test + public void testCachingIsEnabledByDefaultAndBrotliIsPresent() { + assertEquals(BrotliUtils.CachedAvailability.CACHED_AVAILABLE, BrotliUtils.getCachedBrotliAvailability()); + assertTrue(BrotliUtils.isBrotliCompressionAvailable()); + } + + @Test + public void testCanTurnOffCaching() { + try { + BrotliUtils.setCacheBrotliAvailablity(false); + assertEquals(BrotliUtils.CachedAvailability.DONT_CACHE, BrotliUtils.getCachedBrotliAvailability()); + assertTrue(BrotliUtils.isBrotliCompressionAvailable()); + } finally { + BrotliUtils.setCacheBrotliAvailablity(true); + } + } + + @Test + public void testTurningOnCachingReEvaluatesAvailability() { + try { + BrotliUtils.setCacheBrotliAvailablity(false); + assertEquals(BrotliUtils.CachedAvailability.DONT_CACHE, BrotliUtils.getCachedBrotliAvailability()); + BrotliUtils.setCacheBrotliAvailablity(true); + assertEquals(BrotliUtils.CachedAvailability.CACHED_AVAILABLE, BrotliUtils.getCachedBrotliAvailability()); + } finally { + BrotliUtils.setCacheBrotliAvailablity(true); + } + } + + + @Test + public void availableShouldReturnZero() throws IOException { + final File input = getFile("brotli.testdata.compressed"); + try (InputStream is = new FileInputStream(input)) { + final BrotliCompressorInputStream in = + new BrotliCompressorInputStream(is); + Assert.assertTrue(in.available() == 0); + in.close(); + } + } + + @Test + public void shouldBeAbleToSkipAByte() throws IOException { + final File input = getFile("brotli.testdata.compressed"); + try (InputStream is = new FileInputStream(input)) { + final BrotliCompressorInputStream in = + new BrotliCompressorInputStream(is); + Assert.assertEquals(1, in.skip(1)); + in.close(); + } + } + + @Test + public void singleByteReadWorksAsExpected() throws IOException { + final File input = getFile("brotli.testdata.compressed"); + try (InputStream is = new FileInputStream(input)) { + final BrotliCompressorInputStream in = + new BrotliCompressorInputStream(is); + // starts with filename "XXX" + Assert.assertEquals('X', in.read()); + in.close(); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("brotli.testdata.compressed"); + try (InputStream is = new FileInputStream(input)) { + final BrotliCompressorInputStream in = + new BrotliCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("brotli.testdata.compressed"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final BrotliCompressorInputStream in = + new BrotliCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } + + @Test + public void testBrotliUnarchive() throws Exception { + final File input = getFile("bla.tar.br"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("br", is); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStreamTest.java new file mode 100644 index 000000000..7130d928a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStreamTest.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +import static org.apache.commons.compress.AbstractTestCase.getFile; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class BZip2CompressorInputStreamTest { + + @Test(expected = IOException.class) + public void shouldThrowAnIOExceptionWhenAppliedToAZipFile() throws Exception { + try (FileInputStream in = new FileInputStream(getFile("bla.zip"))) { + BZip2CompressorInputStream bis = new BZip2CompressorInputStream(in); + bis.close(); + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-309" + */ + @Test + public void readOfLength0ShouldReturn0() throws Exception { + // Create a big random piece of data + final byte[] rawData = new byte[1048576]; + for (int i=0; i < rawData.length; ++i) { + rawData[i] = (byte) Math.floor(Math.random()*256); + } + + // Compress it + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final BZip2CompressorOutputStream bzipOut = new BZip2CompressorOutputStream(baos); + bzipOut.write(rawData); + bzipOut.flush(); + bzipOut.close(); + baos.flush(); + baos.close(); + + // Try to read it back in + final ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + final BZip2CompressorInputStream bzipIn = new BZip2CompressorInputStream(bais); + final byte[] buffer = new byte[1024]; + Assert.assertEquals(1024, bzipIn.read(buffer, 0, 1024)); + Assert.assertEquals(0, bzipIn.read(buffer, 1024, 0)); + Assert.assertEquals(1024, bzipIn.read(buffer, 0, 1024)); + bzipIn.close(); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.txt.bz2"); + try (InputStream is = new FileInputStream(input)) { + final BZip2CompressorInputStream in = + new BZip2CompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.txt.bz2"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final BZip2CompressorInputStream in = + new BZip2CompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java b/src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java new file mode 100644 index 000000000..37b27efde --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class BlockSortTest { + + private static final byte[] FIXTURE = { 0, 1, (byte) 252, (byte) 253, (byte) 255, + (byte) 254, 3, 2, (byte) 128 }; + + /* + Burrows-Wheeler transform of fixture the manual way: + + * build the matrix + + 0, 1, 252, 253, 255, 254, 3, 2, 128 + 1, 252, 253, 255, 254, 3, 2, 128, 0 + 252, 253, 255, 254, 3, 2, 128, 0, 1 + 253, 255, 254, 3, 2, 128, 0, 1, 252 + 255, 254, 3, 2, 128, 0, 1, 252, 253 + 254, 3, 2, 128, 0, 1, 252, 253, 255 + 3, 2, 128, 0, 1, 252, 253, 255, 254 + 2, 128, 0, 1, 252, 253, 255, 254, 3 + 128, 0, 1, 252, 253, 255, 254, 3, 2 + + * sort it + + 0, 1, 252, 253, 255, 254, 3, 2, 128 + 1, 252, 253, 255, 254, 3, 2, 128, 0 + 2, 128, 0, 1, 252, 253, 255, 254, 3 + 3, 2, 128, 0, 1, 252, 253, 255, 254 + 128, 0, 1, 252, 253, 255, 254, 3, 2 + 252, 253, 255, 254, 3, 2, 128, 0, 1 + 253, 255, 254, 3, 2, 128, 0, 1, 252 + 254, 3, 2, 128, 0, 1, 252, 253, 255 + 255, 254, 3, 2, 128, 0, 1, 252, 253 + + * grab last column + + 128, 0, 3, 254, 2, 1, 252, 255, 253 + + and the original line has been 0 + */ + + private static final byte[] FIXTURE_BWT = { (byte) 128, 0, 3, (byte) 254, 2, 1, + (byte) 252, (byte) 255, (byte) 253 }; + + private static final int[] FIXTURE_SORTED = { + 0, 1, 7, 6, 8, 2, 3, 5, 4 + }; + + private static final byte[] FIXTURE2 = { + 'C', 'o', 'm', 'm', 'o', 'n', 's', ' ', 'C', 'o', 'm', 'p', 'r', 'e', 's', 's', + }; + + private static final byte[] FIXTURE2_BWT = { + 's', 's', ' ', 'r', 'o', 'm', 'o', 'o', 'C', 'C', 'm', 'm', 'p', 'n', 's', 'e', + }; + + @Test + public void testSortFixture() { + final DS ds = setUpFixture(); + ds.s.blockSort(ds.data, FIXTURE.length - 1); + assertFixtureSorted(ds.data); + assertEquals(0, ds.data.origPtr); + } + + @Test + public void testSortFixtureMainSort() { + final DS ds = setUpFixture(); + ds.s.mainSort(ds.data, FIXTURE.length - 1); + assertFixtureSorted(ds.data); + } + + @Test + public void testSortFixtureFallbackSort() { + final DS ds = setUpFixture(); + ds.s.fallbackSort(ds.data, FIXTURE.length - 1); + assertFixtureSorted(ds.data); + } + + @Test + public void testSortFixture2() { + final DS ds = setUpFixture2(); + ds.s.blockSort(ds.data, FIXTURE2.length - 1); + assertFixture2Sorted(ds.data); + assertEquals(1, ds.data.origPtr); + } + + @Test + public void testSortFixture2MainSort() { + final DS ds = setUpFixture2(); + ds.s.mainSort(ds.data, FIXTURE2.length - 1); + assertFixture2Sorted(ds.data); + } + + @Test + public void testSortFixture2FallbackSort() { + final DS ds = setUpFixture2(); + ds.s.fallbackSort(ds.data, FIXTURE2.length - 1); + assertFixture2Sorted(ds.data); + } + + @Test + public void testFallbackSort() { + final BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1); + final BlockSort s = new BlockSort(data); + final int[] fmap = new int[FIXTURE.length]; + s.fallbackSort(fmap, FIXTURE, FIXTURE.length); + assertArrayEquals(FIXTURE_SORTED, fmap); + } + + private DS setUpFixture() { + return setUpFixture(FIXTURE); + } + + private void assertFixtureSorted(final BZip2CompressorOutputStream.Data data) { + assertFixtureSorted(data, FIXTURE, FIXTURE_BWT); + } + + private DS setUpFixture2() { + return setUpFixture(FIXTURE2); + } + + private void assertFixture2Sorted(final BZip2CompressorOutputStream.Data data) { + assertFixtureSorted(data, FIXTURE2, FIXTURE2_BWT); + } + + private DS setUpFixture(final byte[] fixture) { + final BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1); + System.arraycopy(fixture, 0, data.block, 1, fixture.length); + return new DS(data, new BlockSort(data)); + } + + private void assertFixtureSorted(final BZip2CompressorOutputStream.Data data, + final byte[] fixture, final byte[] fixtureBwt) { + assertEquals(fixture[fixture.length - 1], data.block[0]); + for (int i = 0; i < fixture.length; i++) { + assertEquals(fixtureBwt[i], data.block[data.fmap[i]]); + } + } + + private static class DS { + private final BZip2CompressorOutputStream.Data data; + private final BlockSort s; + DS(final BZip2CompressorOutputStream.Data data, final BlockSort s) { + this.data = data; + this.s = s; + } + } +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/compressors/bzip2/PythonTruncatedBzip2Test.java b/src/test/java/org/apache/commons/compress/compressors/bzip2/PythonTruncatedBzip2Test.java new file mode 100644 index 000000000..2bdd014bf --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/bzip2/PythonTruncatedBzip2Test.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +import static org.junit.Assert.assertArrayEquals; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Testcase porting a test from Python's testsuite. + * @see "https://issues.apache.org/jira/browse/COMPRESS-253" + */ +public class PythonTruncatedBzip2Test { + + private static String TEXT = "root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n"; + + private static byte[] DATA; + private static byte[] TRUNCATED_DATA; + private ReadableByteChannel bz2Channel; + + @BeforeClass + public static void initializeTestData() throws IOException { + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + final BZip2CompressorOutputStream bz2out = new BZip2CompressorOutputStream(out); + bz2out.write(TEXT.getBytes(), 0, TEXT.getBytes().length); + bz2out.close(); + DATA = out.toByteArray(); + + // Drop the eos_magic field (6 bytes) and CRC (4 bytes). + TRUNCATED_DATA = copyOfRange(DATA, 0, DATA.length - 10); + } + + @Before + public void initializeChannel() throws IOException { + final InputStream source = new ByteArrayInputStream(TRUNCATED_DATA); + this.bz2Channel = makeBZ2C(source); + } + + @After + public void closeChannel() throws IOException { + bz2Channel.close(); + bz2Channel = null; + } + + @Test(expected = IOException.class) + public void testTruncatedData() throws IOException { + //with BZ2File(self.filename) as f: + // self.assertRaises(EOFError, f.read) + System.out.println("Attempt to read the whole thing in, should throw ..."); + final ByteBuffer buffer = ByteBuffer.allocate(8192); + bz2Channel.read(buffer); + } + + @Test + public void testPartialReadTruncatedData() throws IOException { + //with BZ2File(self.filename) as f: + // self.assertEqual(f.read(len(self.TEXT)), self.TEXT) + // self.assertRaises(EOFError, f.read, 1) + + final int length = TEXT.length(); + ByteBuffer buffer = ByteBuffer.allocate(length); + bz2Channel.read(buffer); + + assertArrayEquals(copyOfRange(TEXT.getBytes(), 0, length), + buffer.array()); + + // subsequent read should throw + buffer = ByteBuffer.allocate(1); + try { + bz2Channel.read(buffer); + Assert.fail("The read should have thrown."); + } catch (final IOException e) { + // pass + } + } + + private static ReadableByteChannel makeBZ2C(final InputStream source) throws IOException { + final BufferedInputStream bin = new BufferedInputStream(source); + final BZip2CompressorInputStream bZin = new BZip2CompressorInputStream(bin, true); + + return Channels.newChannel(bZin); + } + + // Helper method since Arrays#copyOfRange is Java 1.6+ + // Does not check parameters, so may fail if they are incompatible + private static byte[] copyOfRange(final byte[] original, final int from, final int to) { + final int length = to - from; + final byte buff[] = new byte[length]; + System.arraycopy(original, from, buff, 0, length); + return buff; + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorInputStreamTest.java new file mode 100644 index 000000000..97a010f6c --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorInputStreamTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.deflate; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class DeflateCompressorInputStreamTest { + + @Test + public void availableShouldReturnNonZero() throws IOException { + final File input = AbstractTestCase.getFile("bla.tar.deflatez"); + try (InputStream is = new FileInputStream(input)) { + final DeflateCompressorInputStream in = + new DeflateCompressorInputStream(is); + Assert.assertTrue(in.available() > 0); + in.close(); + } + } + + @Test + public void shouldBeAbleToSkipAByte() throws IOException { + final File input = AbstractTestCase.getFile("bla.tar.deflatez"); + try (InputStream is = new FileInputStream(input)) { + final DeflateCompressorInputStream in = + new DeflateCompressorInputStream(is); + Assert.assertEquals(1, in.skip(1)); + in.close(); + } + } + + @Test + public void singleByteReadWorksAsExpected() throws IOException { + final File input = AbstractTestCase.getFile("bla.tar.deflatez"); + try (InputStream is = new FileInputStream(input)) { + final DeflateCompressorInputStream in = + new DeflateCompressorInputStream(is); + // tar header starts with filename "test1.xml" + Assert.assertEquals('t', in.read()); + in.close(); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = AbstractTestCase.getFile("bla.tar.deflatez"); + try (InputStream is = new FileInputStream(input)) { + final DeflateCompressorInputStream in = + new DeflateCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = AbstractTestCase.getFile("bla.tar.deflatez"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final DeflateCompressorInputStream in = + new DeflateCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorOutputStreamTest.java new file mode 100644 index 000000000..ec1940646 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateCompressorOutputStreamTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.deflate; + +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +public class DeflateCompressorOutputStreamTest { + + @Test + public void canReadASingleByteFlushAndFinish() throws IOException { + try (final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + final DeflateCompressorOutputStream cos = new DeflateCompressorOutputStream(bos)) { + cos.write(99); + cos.flush(); + cos.finish(); + Assert.assertTrue(bos.toByteArray().length > 0); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateParametersTest.java b/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateParametersTest.java new file mode 100644 index 000000000..0a43798f2 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/deflate/DeflateParametersTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.deflate; + +import org.junit.Assert; +import org.junit.Test; + +public class DeflateParametersTest { + + @Test + public void shouldBeAbleToSetCompressionLevel() { + final DeflateParameters p = new DeflateParameters(); + p.setCompressionLevel(5); + Assert.assertEquals(5, p.getCompressionLevel()); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldNotBeAbleToSetCompressionLevelToANegativeValue() { + final DeflateParameters p = new DeflateParameters(); + p.setCompressionLevel(-2); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldNotBeAbleToSetCompressionLevelToADoubleDigitValue() { + final DeflateParameters p = new DeflateParameters(); + p.setCompressionLevel(12); + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/deflate64/Deflate64BugsTest.java b/src/test/java/org/apache/commons/compress/compressors/deflate64/Deflate64BugsTest.java new file mode 100644 index 000000000..d21f05252 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/deflate64/Deflate64BugsTest.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.compressors.deflate64; + +import static org.apache.commons.compress.AbstractTestCase.getFile; + +import java.io.InputStream; +import java.util.Enumeration; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.junit.Test; + +public class Deflate64BugsTest { + + @Test + public void readBeyondMemoryException() throws Exception { + try (ZipFile zfile = new ZipFile(getFile("COMPRESS-380/COMPRESS-380-readbeyondmemory.zip"))) { + Enumeration<ZipArchiveEntry> entries = zfile.getEntries(); + while (entries.hasMoreElements()) { + ZipArchiveEntry e = entries.nextElement(); + byte [] buf = new byte [1024 * 8]; + try (InputStream is = zfile.getInputStream(e)) { + while (true) { + int read = is.read(buf); + if (read == -1) { + break; + } + } + } + } + } + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/deflate64/Deflate64CompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/deflate64/Deflate64CompressorInputStreamTest.java new file mode 100644 index 000000000..531a14fea --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/deflate64/Deflate64CompressorInputStreamTest.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.compressors.deflate64; + +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.runners.MockitoJUnitRunner; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.InputStreamReader; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.times; + +@RunWith(MockitoJUnitRunner.class) +public class Deflate64CompressorInputStreamTest { + private final HuffmanDecoder nullDecoder = null; + + @Mock + private HuffmanDecoder decoder; + + @Test + public void readWhenClosed() throws Exception { + Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(nullDecoder); + assertEquals(-1, input.read()); + assertEquals(-1, input.read(new byte[1])); + assertEquals(-1, input.read(new byte[1], 0, 1)); + } + + @Test + public void properSizeWhenClosed() throws Exception { + Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(nullDecoder); + assertEquals(0, input.available()); + } + + @Test + public void delegatesAvailable() throws Exception { + Mockito.when(decoder.available()).thenReturn(1024); + + Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(decoder); + assertEquals(1024, input.available()); + } + + @Test + public void closeCallsDecoder() throws Exception { + + Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(decoder); + input.close(); + + Mockito.verify(decoder, times(1)).close(); + } + + @Test + public void closeIsDelegatedJustOnce() throws Exception { + + Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(decoder); + + input.close(); + input.close(); + + Mockito.verify(decoder, times(1)).close(); + } + + @Test + public void uncompressedBlock() throws Exception { + byte[] data = { + 1, 11, 0, -12, -1, + 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd' + }; + + try (Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(new ByteArrayInputStream(data)); + BufferedReader br = new BufferedReader(new InputStreamReader(input))) { + assertEquals("Hello World", br.readLine()); + assertEquals(null, br.readLine()); + } + } + + @Test + public void uncompressedBlockViaFactory() throws Exception { + byte[] data = { + 1, 11, 0, -12, -1, + 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd' + }; + + try (InputStream input = new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.DEFLATE64, new ByteArrayInputStream(data)); + BufferedReader br = new BufferedReader(new InputStreamReader(input))) { + assertEquals("Hello World", br.readLine()); + assertEquals(null, br.readLine()); + } + } + + @Test + public void uncompressedBlockAvailable() throws Exception { + byte[] data = { + 1, 11, 0, -12, -1, + 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd' + }; + + try (Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(new ByteArrayInputStream(data))) { + assertEquals('H', input.read()); + assertEquals(10, input.available()); + } + } + + @Test + public void streamIgnoresExtraBytesAfterDeflatedInput() throws Exception + { + byte[] data = { + 1, 11, 0, -12, -1, + 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', 'X' + }; + + try (Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(new ByteArrayInputStream(data)); + BufferedReader br = new BufferedReader(new InputStreamReader(input))) { + assertEquals("Hello World", br.readLine()); + assertEquals(null, br.readLine()); + } + } + + @Test(expected = java.io.EOFException.class) + public void throwsEOFExceptionOnTruncatedStreams() throws Exception + { + byte[] data = { + 1, 11, 0, -12, -1, + 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', + }; + + try (Deflate64CompressorInputStream input = new Deflate64CompressorInputStream(new ByteArrayInputStream(data)); + BufferedReader br = new BufferedReader(new InputStreamReader(input))) { + assertEquals("Hello World", br.readLine()); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + try (final Deflate64CompressorInputStream in = + new Deflate64CompressorInputStream(nullDecoder)) { + IOUtils.toByteArray(in); + assertEquals(-1, in.read()); + assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { + byte[] buf = new byte[2]; + try (final Deflate64CompressorInputStream in = + new Deflate64CompressorInputStream(nullDecoder)) { + IOUtils.toByteArray(in); + assertEquals(-1, in.read(buf)); + assertEquals(-1, in.read(buf)); + in.close(); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/deflate64/HuffmanDecoderTest.java b/src/test/java/org/apache/commons/compress/compressors/deflate64/HuffmanDecoderTest.java new file mode 100644 index 000000000..57249f0c4 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/deflate64/HuffmanDecoderTest.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.compressors.deflate64; + +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.util.Arrays; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +public class HuffmanDecoderTest { + @Test + public void decodeUncompressedBlock() throws Exception { + byte[] data = { + 0b1, // end of block + no compression mode + 11, 0, -12, -1, // len & ~len + 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd' + }; + + HuffmanDecoder decoder = new HuffmanDecoder(new ByteArrayInputStream(data)); + byte[] result = new byte[100]; + int len = decoder.decode(result); + + assertEquals(11, len); + assertEquals("Hello World", new String(result, 0, len)); + } + + @Test + public void decodeUncompressedBlockWithInvalidLenNLenValue() throws Exception { + byte[] data = { + 0b1, // end of block + no compression mode + 11, 0, -12, -2, // len & ~len + 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd' + }; + + HuffmanDecoder decoder = new HuffmanDecoder(new ByteArrayInputStream(data)); + byte[] result = new byte[100]; + try { + int len = decoder.decode(result); + fail("Should have failed but returned " + len + " entries: " + Arrays.toString(Arrays.copyOf(result, len))); + } catch (IllegalStateException e) { + assertEquals("Illegal LEN / NLEN values", e.getMessage()); + } + } + + @Test + public void decodeSimpleFixedHuffmanBlock() throws Exception { + byte[] data = { + //|--- binary filling ---|76543210 + 0b11111111111111111111111111110011, // final block + fixed huffman + H + 0b00000000000000000000000001001000, // H + e + 0b11111111111111111111111111001101, // e + l + 0b11111111111111111111111111001001, // l + l + 0b11111111111111111111111111001001, // l + o + 0b00000000000000000000000001010111, // o + ' ' + 0b00000000000000000000000000001000, // ' ' + W + 0b11111111111111111111111111001111, // W + o + 0b00000000000000000000000000101111, // o + r + 0b11111111111111111111111111001010, // r + l + 0b00000000000000000000000001001001, // l + d + 0b00000000000000000000000000000001, // d + end of block + 0b11111111111111111111111111111100 // end of block (00) + garbage + }; + + HuffmanDecoder decoder = new HuffmanDecoder(new ByteArrayInputStream(data)); + byte[] result = new byte[100]; + int len = decoder.decode(result); + + assertEquals(11, len); + assertEquals("Hello World", new String(result, 0, len)); + } + + @Test + public void decodeSimpleFixedHuffmanBlockToSmallBuffer() throws Exception { + byte[] data = { + //|--- binary filling ---|76543210 + 0b11111111111111111111111111110011, // final block + fixed huffman + H + 0b00000000000000000000000001001000, // H + e + 0b11111111111111111111111111001101, // e + l + 0b11111111111111111111111111001001, // l + l + 0b11111111111111111111111111001001, // l + o + 0b00000000000000000000000001010111, // o + ' ' + 0b00000000000000000000000000001000, // ' ' + W + 0b11111111111111111111111111001111, // W + o + 0b00000000000000000000000000101111, // o + r + 0b11111111111111111111111111001010, // r + l + 0b00000000000000000000000001001001, // l + d + 0b00000000000000000000000000000001, // d + end of block + 0b11111111111111111111111111111100 // end of block (00) + garbage + }; + + HuffmanDecoder decoder = new HuffmanDecoder(new ByteArrayInputStream(data)); + byte[] result = new byte[10]; + int len; + len = decoder.decode(result); + assertEquals(10, len); + assertEquals("Hello Worl", new String(result, 0, len)); + len = decoder.decode(result); + assertEquals(1, len); + assertEquals("d", new String(result, 0, len)); + } + + + @Test + public void decodeFixedHuffmanBlockWithMemoryLookup() throws Exception { + byte[] data = { + //|--- binary filling ---|76543210 + 0b11111111111111111111111111110011, // final block + fixed huffman + H + 0b00000000000000000000000001001000, // H + e + 0b11111111111111111111111111001101, // e + l + 0b11111111111111111111111111001001, // l + l + 0b11111111111111111111111111001001, // l + o + 0b00000000000000000000000001010111, // o + ' ' + 0b00000000000000000000000000001000, // ' ' + W + 0b11111111111111111111111111001111, // W + o + 0b00000000000000000000000000101111, // o + r + 0b11111111111111111111111111001010, // r + l + 0b00000000000000000000000001001001, // l + d + 0b11111111111111111111111111100001, // d + '\n' + 0b00000000000000000000000000100010, // '\n' + <len> + 0b11111111111111111111111110000110, // <len> + offset <001> + dist6 + 0b00000000000000000000000000001101, // dist6 + offset <11> + end of block (000000) + 0b11111111111111111111111111111000 // end of block (0000) + garbage + }; + + HuffmanDecoder decoder = new HuffmanDecoder(new ByteArrayInputStream(data)); + byte[] result = new byte[100]; + int len = decoder.decode(result); + + assertEquals(48, len); + assertEquals("Hello World\nHello World\nHello World\nHello World\n", new String(result, 0, len)); + } + + @Test + public void decodeFixedHuffmanBlockWithMemoryLookupInSmallBuffer() throws Exception { + byte[] data = { + //|--- binary filling ---|76543210 + 0b11111111111111111111111111110011, // final block + fixed huffman + H + 0b00000000000000000000000001001000, // H + e + 0b11111111111111111111111111001101, // e + l + 0b11111111111111111111111111001001, // l + l + 0b11111111111111111111111111001001, // l + o + 0b00000000000000000000000001010111, // o + ' ' + 0b00000000000000000000000000001000, // ' ' + W + 0b11111111111111111111111111001111, // W + o + 0b00000000000000000000000000101111, // o + r + 0b11111111111111111111111111001010, // r + l + 0b00000000000000000000000001001001, // l + d + 0b11111111111111111111111111100001, // d + '\n' + 0b00000000000000000000000000100010, // '\n' + <len> + 0b11111111111111111111111110000110, // <len> + offset <001> + dist6 + 0b00000000000000000000000000001101, // dist6 + offset <11> + end of block (000000) + 0b11111111111111111111111111111000 // end of block (0000) + garbage + }; + + HuffmanDecoder decoder = new HuffmanDecoder(new ByteArrayInputStream(data)); + byte[] result = new byte[30]; + int len; + + len = decoder.decode(result); + assertEquals(30, len); + assertEquals("Hello World\nHello World\nHello ", new String(result, 0, len)); + + len = decoder.decode(result); + assertEquals(18, len); + assertEquals("World\nHello World\n", new String(result, 0, len)); + } + + @Test + public void decodeFixedHuffmanBlockWithMemoryLookupInExactBuffer() throws Exception { + byte[] data = { + //|--- binary filling ---|76543210 + 0b11111111111111111111111111110011, // final block + fixed huffman + H + 0b00000000000000000000000001001000, // H + e + 0b11111111111111111111111111001101, // e + l + 0b11111111111111111111111111001001, // l + l + 0b11111111111111111111111111001001, // l + o + 0b00000000000000000000000001010111, // o + ' ' + 0b00000000000000000000000000001000, // ' ' + W + 0b11111111111111111111111111001111, // W + o + 0b00000000000000000000000000101111, // o + r + 0b11111111111111111111111111001010, // r + l + 0b00000000000000000000000001001001, // l + d + 0b11111111111111111111111111100001, // d + '\n' + 0b00000000000000000000000000100010, // '\n' + <len> + 0b11111111111111111111111110000110, // <len> + offset <001> + dist6 + 0b00000000000000000000000000001101, // dist6 + offset <11> + end of block (000000) + 0b11111111111111111111111111111000 // end of block (0000) + garbage + }; + + HuffmanDecoder decoder = new HuffmanDecoder(new ByteArrayInputStream(data)); + byte[] result = new byte[48]; + int len; + + len = decoder.decode(result); + assertEquals(48, len); + assertEquals("Hello World\nHello World\nHello World\nHello World\n", new String(result, 0, len)); + + len = decoder.decode(result); + assertEquals(0, len); + + len = decoder.decode(result); + assertEquals(-1, len); + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStreamTest.java new file mode 100644 index 000000000..13331dd7a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStreamTest.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class BlockLZ4CompressorInputStreamTest extends AbstractTestCase { + + @Test + public void readBlaLz4() throws IOException { + try (InputStream a = new BlockLZ4CompressorInputStream(new FileInputStream(getFile("bla.tar.block_lz4"))); + FileInputStream e = new FileInputStream(getFile("bla.tar"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + Assert.assertArrayEquals(expected, actual); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.block_lz4"); + try (InputStream is = new FileInputStream(input)) { + final BlockLZ4CompressorInputStream in = + new BlockLZ4CompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.block_lz4"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final BlockLZ4CompressorInputStream in = + new BlockLZ4CompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java new file mode 100644 index 000000000..fca3f888f --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java @@ -0,0 +1,374 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.compress.compressors.lz77support.LZ77Compressor; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + +public class BlockLZ4CompressorOutputStreamTest { + + @Test + public void pairSeesBackReferenceWhenSet() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + Assert.assertFalse(p.hasBackReference()); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + Assert.assertTrue(p.hasBackReference()); + } + + @Test + public void canWriteBackReferenceFollowedByLongLiteral() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + // a length of 11 would be enough according to the spec, but + // the algorithm we use for rewriting the last block requires + // 16 bytes + Assert.assertTrue(p.canBeWritten(16)); + } + + @Test + @Ignore("would pass if the algorithm used for rewriting the final pairs was smarter") + public void canWriteBackReferenceFollowedByShortLiteralIfOffsetIsBigEnough() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(10, 4)); + Assert.assertTrue(p.canBeWritten(5)); + } + + @Test + @Ignore("would pass if the algorithm used for rewriting the final pairs was smarter") + public void canWriteBackReferenceFollowedByShortLiteralIfLengthIsBigEnough() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 10)); + Assert.assertTrue(p.canBeWritten(5)); + } + + @Test + public void cantWriteBackReferenceFollowedByLiteralThatIsTooShort() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(10, 14)); + Assert.assertFalse(p.canBeWritten(4)); + } + + @Test + public void cantWriteBackReferenceIfAccumulatedOffsetIsTooShort() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + Assert.assertFalse(p.canBeWritten(5)); + } + + @Test + public void pairAccumulatesLengths() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4)); + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 2, 5)); + Assert.assertEquals(13, p.length()); + } + + @Test + public void canWritePairWithoutLiterals() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 0, 1, 0 }, bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor19ByteLengthBackReference() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 19)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 15, 1, 0, 0 }, bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor273ByteLengthBackReference() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 273)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 15, 1, 0, (byte) 254 }, bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor274ByteLengthBackReference() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 274)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 15, 1, 0, (byte) 255, 0 }, bos.toByteArray()); + } + + @Test + public void canWritePairWithoutBackReference() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 4<<4, 2, 3, 4, 5 }, bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor15ByteLengthLiteral() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9)); + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 6)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { (byte) (15<<4), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6 }, + bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor269ByteLengthLiteral() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + for (int i = 0; i < 26; i++) { + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10)); + } + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { (byte) (15<<4), (byte) 254, 1 }, + Arrays.copyOfRange(bos.toByteArray(), 0, 3)); + } + + @Test + public void writesCorrectSizeFor270ByteLengthLiteral() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + for (int i = 0; i < 27; i++) { + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10)); + } + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { (byte) (15<<4), (byte) 255, 0, 1 }, + Arrays.copyOfRange(bos.toByteArray(), 0, 4)); + } + + @Test + public void writesCompletePair() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4)); + b[2] = 19; + p.setBackReference(new LZ77Compressor.BackReference(1, 5)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { (4<<4) + 1, 2, 3, 4, 5, 1, 0 }, + bos.toByteArray()); + } + + @Test + public void rewritingOfFinalBlockWithoutTrailingLZ77Literals() throws IOException { + for (int i = 1; i < 13; i++) { + // according to the spec these are all too short be compressed + // LZ77Compressor will create a single byte literal + // followed by a back-reference starting with i = 5, + // though. (4 is the minimum length for a back-reference + // in LZ4 + byte[] compressed = compress(i); + byte[] expected = prepareExpected(i + 1); + expected[0] = (byte) (i<<4); + Assert.assertArrayEquals("input length is " + i, expected, compressed); + } + + for (int i = 13; i < 17; i++) { + // LZ77Compressor will still create a single byte literal + // followed by a back-reference + // according to the spec the back-reference could be split + // as we can cut out a five byte literal and the offset + // would be big enough, but our algorithm insists on a + // twelve byte literal trailer and the back-reference + // would fall below the minimal size + byte[] compressed = compress(i); + byte[] expected = prepareExpected(i < 15 ? i + 1 : i + 2); + if (i < 15) { + expected[0] = (byte) (i<<4); + } else { + expected[0] = (byte) (15<<4); + expected[1] = (byte) (i - 15); + } + Assert.assertArrayEquals("input length is " + i, expected, compressed); + } + + for (int i = 17; i < 20; i++) { + // LZ77Compressor will still create a single byte literal + // followed by a back-reference + // this time even our algorithm is willing to break up the + // back-reference + byte[] compressed = compress(i); + byte[] expected = prepareExpected(17); + expected[0] = (byte) ((1<<4) | i - 17); + // two-byte offset + expected[2] = 1; + expected[3] = 0; + expected[4] = (byte) (12<<4); + Assert.assertArrayEquals("input length is " + i, expected, compressed); + } + } + + @Test + public void rewritingOfFinalBlockWithTrailingLZ77Literals() throws IOException { + for (int i = 1; i < 5; i++) { + // LZ77Compressor will create a single byte literal + // followed by a back-reference of length 15 followed by a + // literal of length i + // we can split the back-reference and merge it with the literal + byte[] compressed = compress(16, i); + byte[] expected = prepareExpected(17); + expected[0] = (byte) ((1<<4) | i - 1); + // two-byte offset + expected[2] = 1; + expected[3] = 0; + expected[4] = (byte) (12<<4); + for (int j = 0; j < i; j++) { + expected[expected.length - 1 - j] = 1; + } + Assert.assertArrayEquals("trailer length is " + i, expected, compressed); + } + for (int i = 5; i < 12; i++) { + // LZ77Compressor will create a single byte literal + // followed by a back-reference of length 15 followed by + // another single byte literal and another back-reference + // of length i-1 + // according to the spec we could completely satisfy the + // requirements by just rewriting the last Pair, but our + // algorithm will chip off a few bytes from the first Pair + byte[] compressed = compress(16, i); + byte[] expected = prepareExpected(17); + expected[0] = (byte) ((1<<4) | i - 1); + // two-byte offset + expected[2] = 1; + expected[3] = 0; + expected[4] = (byte) (12<<4); + for (int j = 0; j < i; j++) { + expected[expected.length - 1 - j] = 1; + } + Assert.assertArrayEquals("trailer length is " + i, expected, compressed); + } + for (int i = 12; i < 15; i++) { + // LZ77Compressor will create a single byte literal + // followed by a back-reference of length 15 followed by + // another single byte literal and another back-reference + // of length i-1 + // this shouldn't affect the first pair at all as + // rewriting the second one is sufficient + byte[] compressed = compress(16, i); + byte[] expected = prepareExpected(i + 5); + expected[0] = (byte) ((1<<4) | 11); + // two-byte offset + expected[2] = 1; + expected[3] = 0; + expected[4] = (byte) (i<<4); + for (int j = 0; j < i; j++) { + expected[expected.length - 1 - j] = 1; + } + Assert.assertArrayEquals("trailer length is " + i, expected, compressed); + } + } + + @Test + public void rewritingOfFourPairs() throws IOException { + // LZ77Compressor creates three times a literal block followed + // by a back-reference (once 5 bytes long and twice four bytes + // long and a final literal block of length 1 + // in the result the three last pairs are merged into a single + // literal and one byte is chopped off of the first pair's + // back-reference + byte[] compressed = compress(6, 5, 5, 1); + byte[] expected = prepareExpected(17); + expected[0] = (byte) (1<<4); + // two-byte offset + expected[2] = 1; + expected[3] = 0; + expected[4] = (byte) (12<<4); + for (int i = 6; i < 11; i++) { + expected[i] = 1; + } + for (int i = 11; i < 16; i++) { + expected[i] = 2; + } + expected[16] = 3; + Assert.assertArrayEquals(expected, compressed); + } + + @Test + public void rewritingWithFinalBackreferenceAndOffsetBiggerThan1() throws IOException { + // this caused trouble when expandFromList() fell into the "offsetRemaining is negative" self-copy case as the + // calculation of copyOffset was wrong + byte[] toCompress = prepareExpected(25); + for (int i = 0; i < toCompress.length; i += 4) { + toCompress[i] = 1; + } + // LZ77Compressor creates a four byte literal and a back-reference with offset 4 and length 21 + // we'll need to split the back-reference and chop off the last 12 bytes + byte[] compressed = compress(toCompress); + byte[] expected = prepareExpected(1 + 4 + 2 + 1 + 12); + expected[0] = (byte) ((4<<4) | 5); + expected[1] = 1; + expected[5] = 4; + expected[6] = 0; + expected[7] = (byte) (12<<4); + for (int i = 11; i < expected.length; i += 4) { + expected[i] = 1; + } + Assert.assertArrayEquals(expected, compressed); + } + + private byte[] compress(int length) throws IOException { + return compress(length, 0); + } + + private byte[] compress(int lengthBeforeTrailer, int... lengthOfTrailers) throws IOException { + byte[] b = prepareExpected(lengthBeforeTrailer); + return compress(b, lengthOfTrailers); + } + + private byte[] compress(byte[] input, int... lengthOfTrailers) throws IOException { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BlockLZ4CompressorOutputStream lo = new BlockLZ4CompressorOutputStream(baos)) { + lo.write(input); + for (int i = 0; i < lengthOfTrailers.length; i++) { + int lengthOfTrailer = lengthOfTrailers[i]; + for (int j = 0; j < lengthOfTrailer; j++) { + lo.write(i + 1); + } + } + lo.close(); + return baos.toByteArray(); + } + } + + private byte[] prepareExpected(int length) { + byte[] b = new byte[length]; + Arrays.fill(b, (byte) -1); + return b; + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java new file mode 100644 index 000000000..da4941d8f --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.lz77support.Parameters; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runners.Parameterized; +import org.junit.runner.RunWith; + +@RunWith(Parameterized.class) +public final class BlockLZ4CompressorRoundtripTest extends AbstractTestCase { + + @org.junit.runners.Parameterized.Parameters(name = "using {0}") + public static Collection<Object[]> factory() { + return Arrays.asList(new Object[][] { + new Object[] { "default", BlockLZ4CompressorOutputStream.createParameterBuilder().build() }, + new Object[] { "tuned for speed", + BlockLZ4CompressorOutputStream.createParameterBuilder().tunedForSpeed().build() }, + new Object[] { "tuned for compression ratio", + BlockLZ4CompressorOutputStream.createParameterBuilder().tunedForCompressionRatio().build() } + }); + } + + private final String config; + private final Parameters params; + + public BlockLZ4CompressorRoundtripTest(String config, Parameters params) { + this.config = config; + this.params = params; + } + + private void roundTripTest(String testFile) throws IOException { + File input = getFile(testFile); + long start = System.currentTimeMillis(); + final File outputSz = new File(dir, input.getName() + ".block.lz4"); + try (FileInputStream is = new FileInputStream(input); + FileOutputStream os = new FileOutputStream(outputSz); + BlockLZ4CompressorOutputStream los = new BlockLZ4CompressorOutputStream(os, params)) { + IOUtils.copy(is, los); + } + System.err.println("Configuration: " + config); + System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); + start = System.currentTimeMillis(); + try (FileInputStream is = new FileInputStream(input); + BlockLZ4CompressorInputStream sis = new BlockLZ4CompressorInputStream(new FileInputStream(outputSz))) { + byte[] expected = IOUtils.toByteArray(is); + byte[] actual = IOUtils.toByteArray(sis); + Assert.assertArrayEquals(expected, actual); + } + System.err.println(outputSz.getName() + " read after " + (System.currentTimeMillis() - start) + "ms"); + } + + // should yield decent compression + @Test + public void blaTarRoundtrip() throws IOException { + roundTripTest("bla.tar"); + } + + // yields no compression at all + @Test + public void gzippedLoremIpsumRoundtrip() throws IOException { + roundTripTest("lorem-ipsum.txt.gz"); + } + + // yields no compression at all + @Test + public void biggerFileRoundtrip() throws IOException { + roundTripTest("COMPRESS-256.7z"); + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FactoryTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FactoryTest.java new file mode 100644 index 000000000..3db62497a --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/FactoryTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class FactoryTest extends AbstractTestCase { + + @Test + public void frameRoundtripViaFactory() throws Exception { + roundtripViaFactory(CompressorStreamFactory.getLZ4Framed()); + } + + @Test + public void blockRoundtripViaFactory() throws Exception { + roundtripViaFactory(CompressorStreamFactory.getLZ4Block()); + } + + private void roundtripViaFactory(String format) throws Exception { + File input = getFile("bla.tar"); + long start = System.currentTimeMillis(); + final File outputSz = new File(dir, input.getName() + "." + format + ".lz4"); + try (FileInputStream is = new FileInputStream(input); + FileOutputStream os = new FileOutputStream(outputSz); + OutputStream los = new CompressorStreamFactory().createCompressorOutputStream(format, os)) { + IOUtils.copy(is, los); + } + System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); + start = System.currentTimeMillis(); + try (FileInputStream is = new FileInputStream(input); + InputStream sis = new CompressorStreamFactory() + .createCompressorInputStream(format, new FileInputStream(outputSz))) { + byte[] expected = IOUtils.toByteArray(is); + byte[] actual = IOUtils.toByteArray(sis); + Assert.assertArrayEquals(expected, actual); + } + System.err.println(outputSz.getName() + " read after " + (System.currentTimeMillis() - start) + "ms"); + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorInputStreamTest.java new file mode 100644 index 000000000..2fd564b97 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorInputStreamTest.java @@ -0,0 +1,645 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import static org.hamcrest.CoreMatchers.*; +import static org.junit.Assert.*; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class FramedLZ4CompressorInputStreamTest + extends AbstractTestCase { + + @Test + public void testMatches() throws IOException { + assertFalse(FramedLZ4CompressorInputStream.matches(new byte[10], 4)); + final byte[] b = new byte[12]; + final File input = getFile("bla.tar.lz4"); + try (FileInputStream in = new FileInputStream(input)) { + IOUtils.readFully(in, b); + } + assertFalse(FramedLZ4CompressorInputStream.matches(b, 3)); + assertTrue(FramedLZ4CompressorInputStream.matches(b, 4)); + assertTrue(FramedLZ4CompressorInputStream.matches(b, 5)); + } + + @Test + public void readBlaLz4() throws IOException { + try (InputStream a = new FramedLZ4CompressorInputStream(new FileInputStream(getFile("bla.tar.lz4"))); + FileInputStream e = new FileInputStream(getFile("bla.tar"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(expected, actual); + } + } + + @Test + public void readBlaLz4ViaFactory() throws Exception { + try (InputStream a = new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.getLZ4Framed(), + new FileInputStream(getFile("bla.tar.lz4"))); + FileInputStream e = new FileInputStream(getFile("bla.tar"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(expected, actual); + } + } + + @Test + public void readBlaLz4ViaFactoryAutoDetection() throws Exception { + try (InputStream a = new CompressorStreamFactory() + .createCompressorInputStream(new BufferedInputStream(new FileInputStream(getFile("bla.tar.lz4")))); + FileInputStream e = new FileInputStream(getFile("bla.tar"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(expected, actual); + } + } + + @Test + public void readBlaLz4WithDecompressConcatenated() throws IOException { + try (InputStream a = new FramedLZ4CompressorInputStream(new FileInputStream(getFile("bla.tar.lz4")), true); + FileInputStream e = new FileInputStream(getFile("bla.tar"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(expected, actual); + } + } + + @Test + public void readDoubledBlaLz4WithDecompressConcatenatedTrue() throws Exception { + readDoubledBlaLz4(new StreamWrapper() { + @Override + public InputStream wrap(InputStream in) throws Exception { + return new FramedLZ4CompressorInputStream(in, true); + } + }, true); + } + + @Test + public void readDoubledBlaLz4WithDecompressConcatenatedFalse() throws Exception { + readDoubledBlaLz4(new StreamWrapper() { + @Override + public InputStream wrap(InputStream in) throws Exception { + return new FramedLZ4CompressorInputStream(in, false); + } + }, false); + } + + @Test + public void readDoubledBlaLz4WithoutExplicitDecompressConcatenated() throws Exception { + readDoubledBlaLz4(new StreamWrapper() { + @Override + public InputStream wrap(InputStream in) throws Exception { + return new FramedLZ4CompressorInputStream(in); + } + }, false); + } + + @Test + public void readBlaLz4ViaFactoryWithDecompressConcatenated() throws Exception { + try (InputStream a = new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.getLZ4Framed(), + new FileInputStream(getFile("bla.tar.lz4")), + true); + FileInputStream e = new FileInputStream(getFile("bla.tar"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(expected, actual); + } + } + + @Test + public void readDoubledBlaLz4ViaFactoryWithDecompressConcatenatedTrue() throws Exception { + readDoubledBlaLz4(new StreamWrapper() { + @Override + public InputStream wrap(InputStream in) throws Exception { + return new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.getLZ4Framed(), in, true); + } + }, true); + } + + @Test + public void readDoubledBlaLz4ViaFactoryWithDecompressConcatenatedFalse() throws Exception { + readDoubledBlaLz4(new StreamWrapper() { + @Override + public InputStream wrap(InputStream in) throws Exception { + return new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.getLZ4Framed(), in, false); + } + }, false); + } + + @Test + public void readDoubledBlaLz4ViaFactoryWithoutExplicitDecompressConcatenated() throws Exception { + readDoubledBlaLz4(new StreamWrapper() { + @Override + public InputStream wrap(InputStream in) throws Exception { + return new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.getLZ4Framed(), in); + } + }, false); + } + + @Test + public void readBlaDumpLz4() throws IOException { + try (InputStream a = new FramedLZ4CompressorInputStream(new FileInputStream(getFile("bla.dump.lz4"))); + FileInputStream e = new FileInputStream(getFile("bla.dump"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(expected, actual); + } + } + + @Test(expected = IOException.class) + public void rejectsNonLZ4Stream() throws IOException { + try (InputStream a = new FramedLZ4CompressorInputStream(new FileInputStream(getFile("bla.tar")))) { + fail("expected exception"); + } + } + + @Test + public void rejectsFileWithoutFrameDescriptor() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18 // signature + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("frame flags")); + } + } + + @Test + public void rejectsFileWithoutBlockSizeByte() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x64, // flag - Version 01, block independent, no block checksum, no content size, with content checksum + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("BD byte")); + } + } + + @Test + public void rejectsFileWithWrongVersion() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x24, // flag - Version 00, block independent, no block checksum, no content size, with content checksum + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("version")); + } + } + + @Test + public void rejectsFileWithInsufficientContentSize() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x6C, // flag - Version 01, block independent, no block checksum, with content size, with content checksum + 0x70, // block size 4MB + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("content size")); + } + } + + @Test + public void rejectsFileWithoutHeaderChecksum() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x64, // flag - Version 01, block independent, no block checksum, no content size, with content checksum + 0x70, // block size 4MB + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("header checksum")); + } + } + + @Test + public void rejectsFileWithBadHeaderChecksum() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x64, // flag - Version 01, block independent, no block checksum, no content size, with content checksum + 0x70, // block size 4MB + 0, + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("header checksum mismatch")); + } + } + + @Test + public void readsUncompressedBlocks() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + }; + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(new byte[] { + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!' + }, actual); + } + } + + @Test + public void readsUncompressedBlocksUsingSingleByteRead() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + }; + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + int h = a.read(); + assertEquals('H', h); + } + } + + @Test + public void rejectsBlocksWithoutChecksum() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x70, // flag - Version 01, block independent, with block checksum, no content size, no content checksum + 0x70, // block size 4MB + 114, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("block checksum")); + } + } + + @Test + public void rejectsStreamsWithoutContentChecksum() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x64, // flag - Version 01, block independent, no block checksum, no content size, with content checksum + 0x70, // block size 4MB + (byte) 185, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("content checksum")); + } + } + + @Test + public void rejectsStreamsWithBadContentChecksum() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x64, // flag - Version 01, block independent, no block checksum, no content size, with content checksum + 0x70, // block size 4MB + (byte) 185, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 1, 2, 3, 4, + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input))) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("content checksum mismatch")); + } + } + + @Test + public void skipsOverSkippableFrames() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x5f, 0x2a, 0x4d, 0x18, // skippable frame signature + 2, 0, 0, 0, // skippable frame has length 2 + 1, 2, // content of skippable frame + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 1, 0, 0, (byte) 0x80, // 1 bytes length and uncompressed bit set + '!', // content + 0, 0, 0, 0, // empty block marker + }; + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(new byte[] { + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', '!' + }, actual); + } + } + + @Test + public void skipsOverTrailingSkippableFrames() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x51, 0x2a, 0x4d, 0x18, // skippable frame signature + 2, 0, 0, 0, // skippable frame has length 2 + 1, 2, // content of skippable frame + }; + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(new byte[] { + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!' + }, actual); + } + } + + @Test + public void rejectsSkippableFrameFollowedByJunk() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x50, 0x2a, 0x4d, 0x18, // skippable frame signature + 2, 0, 0, 0, // skippable frame has length 2 + 1, 2, // content of skippable frame + 1, 0x22, 0x4d, 0x18, // bad signature + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("garbage")); + } + } + + @Test + public void rejectsSkippableFrameFollowedByTooFewBytes() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x52, 0x2a, 0x4d, 0x18, // skippable frame signature + 2, 0, 0, 0, // skippable frame has length 2 + 1, 2, // content of skippable frame + 4, // too short for signature + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("garbage")); + } + } + + @Test + public void rejectsSkippableFrameWithPrematureEnd() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x50, 0x2a, 0x4d, 0x18, // skippable frame signature + 2, 0, 0, 0, // skippable frame has length 2 + 1, // content of skippable frame (should be two bytes) + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("Premature end of stream while skipping frame")); + } + } + + @Test + public void rejectsSkippableFrameWithPrematureEndInLengthBytes() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x55, 0x2a, 0x4d, 0x18, // skippable frame signature + 2, 0, 0, // should be four byte length + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("premature end of data")); + } + } + + @Test + public void rejectsSkippableFrameWithBadSignatureTrailer() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x51, 0x2a, 0x4d, 0x17, // broken skippable frame signature + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("garbage")); + } + } + + @Test + public void rejectsSkippableFrameWithBadSignaturePrefix() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x60, 0x2a, 0x4d, 0x18, // broken skippable frame signature + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("garbage")); + } + } + + @Test + public void rejectsTrailingBytesAfterValidFrame() throws IOException { + byte[] input = new byte[] { + 4, 0x22, 0x4d, 0x18, // signature + 0x60, // flag - Version 01, block independent, no block checksum, no content size, no content checksum + 0x70, // block size 4MB + 115, // checksum + 13, 0, 0, (byte) 0x80, // 13 bytes length and uncompressed bit set + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', // content + 0, 0, 0, 0, // empty block marker + 0x56, 0x2a, 0x4d, // too short for any signature + }; + try { + try (InputStream a = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(input), true)) { + IOUtils.toByteArray(a); + fail("expected exception"); + } + } catch (IOException ex) { + assertThat(ex.getMessage(), containsString("garbage")); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.lz4"); + try (InputStream is = new FileInputStream(input)) { + final FramedLZ4CompressorInputStream in = + new FramedLZ4CompressorInputStream(is); + IOUtils.toByteArray(in); + assertEquals(-1, in.read()); + assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.lz4"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final FramedLZ4CompressorInputStream in = + new FramedLZ4CompressorInputStream(is); + IOUtils.toByteArray(in); + assertEquals(-1, in.read(buf)); + assertEquals(-1, in.read(buf)); + in.close(); + } + } + + interface StreamWrapper { + InputStream wrap(InputStream in) throws Exception; + } + + private void readDoubledBlaLz4(StreamWrapper wrapper, boolean expectDuplicateOutput) throws Exception { + byte[] singleInput; + try (InputStream i = new FileInputStream(getFile("bla.tar.lz4"))) { + singleInput = IOUtils.toByteArray(i); + } + byte[] input = duplicate(singleInput); + try (InputStream a = wrapper.wrap(new ByteArrayInputStream(input)); + FileInputStream e = new FileInputStream(getFile("bla.tar"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(expectDuplicateOutput ? duplicate(expected) : expected, actual); + } + } + + private static byte[] duplicate(byte[] from) { + byte[] to = Arrays.copyOf(from, 2 * from.length); + System.arraycopy(from, 0, to, from.length, from.length); + return to; + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java new file mode 100644 index 000000000..29e444391 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public final class FramedLZ4CompressorRoundtripTest extends AbstractTestCase { + + @Parameters(name = "using {0}") + public static Collection<Object[]> factory() { + return Arrays.asList(new Object[][] { + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.K64) }, + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.K256) }, + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M1) }, + new Object[] { FramedLZ4CompressorOutputStream.Parameters.DEFAULT }, + // default without content checksum + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, + false, false, false) }, + // default with block checksum + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, + true, true, false) }, + // small blocksize (so we get enough blocks) and enabled block dependency, otherwise defaults + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.K64, + true, false, true) }, + // default, tuned for speed + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, + true, false, false, BlockLZ4CompressorOutputStream.createParameterBuilder() + .tunedForSpeed().build()) }, + }); + } + + private final FramedLZ4CompressorOutputStream.Parameters params; + + public FramedLZ4CompressorRoundtripTest(FramedLZ4CompressorOutputStream.Parameters params) { + this.params = params; + } + + private void roundTripTest(String testFile) throws IOException { + File input = getFile(testFile); + long start = System.currentTimeMillis(); + final File outputSz = new File(dir, input.getName() + ".framed.lz4"); + byte[] expected; + try (FileInputStream is = new FileInputStream(input)) { + expected = IOUtils.toByteArray(is); + } + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + try (FramedLZ4CompressorOutputStream los = new FramedLZ4CompressorOutputStream(bos, + params)) { + IOUtils.copy(new ByteArrayInputStream(expected), los); + } + System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); + start = System.currentTimeMillis(); + try (FramedLZ4CompressorInputStream sis = new FramedLZ4CompressorInputStream( + new ByteArrayInputStream(bos.toByteArray()))) { + byte[] actual = IOUtils.toByteArray(sis); + Assert.assertArrayEquals(expected, actual); + } + + System.err.println(outputSz.getName() + " read after " + (System.currentTimeMillis() - start) + "ms"); + } + + // should yield decent compression + @Test + public void blaTarRoundtrip() throws IOException { + roundTripTest("bla.tar"); + } + + // yields no compression at all + @Test + public void gzippedLoremIpsumRoundtrip() throws IOException { + roundTripTest("lorem-ipsum.txt.gz"); + } + + @Test + public void biggerFileRoundtrip() throws IOException { + roundTripTest("COMPRESS-256.7z"); + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/XXHash32Test.java b/src/test/java/org/apache/commons/compress/compressors/lz4/XXHash32Test.java new file mode 100644 index 000000000..0c7e462c1 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/XXHash32Test.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.IOUtils; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.junit.runner.RunWith; + +@RunWith(Parameterized.class) +public class XXHash32Test { + + private final File file; + private final String expectedChecksum; + + public XXHash32Test(String fileName, String c) throws IOException { + file = AbstractTestCase.getFile(fileName); + expectedChecksum = c; + } + + @Parameters + public static Collection<Object[]> factory() { + return Arrays.asList(new Object[][] { + // reference checksums created with xxh32sum + { "bla.tar", "fbb5c8d1" }, + { "bla.tar.xz", "4106a208" }, + { "8.posix.tar.gz", "9fce116a" }, + }); + } + + @Test + public void verifyChecksum() throws IOException { + XXHash32 h = new XXHash32(); + try (FileInputStream s = new FileInputStream(file)) { + byte[] b = IOUtils.toByteArray(s); + h.update(b, 0, b.length); + } + Assert.assertEquals("checksum for " + file.getName(), expectedChecksum, Long.toHexString(h.getValue())); + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStreamTest.java new file mode 100644 index 000000000..1b8f7f9b9 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStreamTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz77support; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class AbstractLZ77CompressorInputStreamTest { + + private static class TestStream extends AbstractLZ77CompressorInputStream { + private boolean literal; + TestStream(InputStream in) throws IOException { + super(in, 1024); + } + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (literal) { + return readLiteral(b, off, len); + } + return readBackReference(b, off, len); + } + void literal(int len) { + startLiteral(len); + literal = true; + } + } + + @Test(expected = IllegalStateException.class) + public void cantPrefillAfterDataHasBeenRead() throws IOException { + byte[] data = new byte[] { 1, 2, 3, 4 }; + try (TestStream s = new TestStream(new ByteArrayInputStream(data))) { + s.literal(3); + assertEquals(1, s.read()); + s.prefill(new byte[] { 1, 2, 3 }); + } + } + + @Test + public void prefillCanBeUsedForBackReferences() throws IOException { + byte[] data = new byte[] { 1, 2, 3, 4 }; + try (TestStream s = new TestStream(new ByteArrayInputStream(new byte[0]))) { + s.prefill(data); + s.startBackReference(2, 4); + byte[] r = new byte[4]; + assertEquals(4, s.read(r)); + assertArrayEquals(new byte[] { 3, 4, 3, 4 }, r); + } + } + + @Test + public void ifPrefillExceedsWindowSizeTheLastBytesAreUsed() throws IOException { + byte[] data = new byte[2048]; + data[2046] = 3; + data[2047] = 4; + try (TestStream s = new TestStream(new ByteArrayInputStream(new byte[0]))) { + s.prefill(data); + s.startBackReference(2, 4); + byte[] r = new byte[4]; + assertEquals(4, s.read(r)); + assertArrayEquals(new byte[] { 3, 4, 3, 4 }, r); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java b/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java new file mode 100644 index 000000000..db4e3abc8 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java @@ -0,0 +1,352 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz77support; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class LZ77CompressorTest { + + private static final byte[] BLA, SAM, ONE_TO_TEN; + + static { + try { + /* + * Example from "An Explanation of the Deflate Algorithm" by "Antaeus Feldspar". + * @see "http://zlib.net/feldspar.html" + */ + BLA = "Blah blah blah blah blah!".getBytes("ASCII"); + + /* + * Example from Wikipedia article about LZSS. + * Note the example uses indices instead of offsets. + * @see "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Storer%E2%80%93Szymanski" + */ + SAM = ("I am Sam\n" + + "\n" + + "Sam I am\n" + + "\n" + + "That Sam-I-am!\n" + + "That Sam-I-am!\n" + + "I do not like\n" + + "that Sam-I-am!\n" + + "\n" + + "Do you like green eggs and ham?\n" + + "\n" + + "I do not like them, Sam-I-am.\n" + + "I do not like green eggs and ham.").getBytes("ASCII"); + } catch (IOException ex) { + throw new RuntimeException("ASCII not supported"); + } + ONE_TO_TEN = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + } + + private List<LZ77Compressor.Block> compress(Parameters params, byte[]... chunks) throws IOException { + final List<LZ77Compressor.Block> blocks = new ArrayList<>(); + LZ77Compressor c = new LZ77Compressor(params, new LZ77Compressor.Callback() { + @Override + public void accept(LZ77Compressor.Block block) { + //System.err.println(block); + if (block instanceof LZ77Compressor.LiteralBlock) { + // replace with a real copy of data so tests + // can see the results as they've been when + // the callback has been called + LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block; + int len = b.getLength(); + block = new LZ77Compressor.LiteralBlock( + Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len), + 0, len); + } + blocks.add(block); + } + }); + for (byte[] chunk : chunks) { + c.compress(chunk); + } + c.finish(); + return blocks; + } + + @Test + public void nonCompressableWithLengthSmallerThanLiteralMax() throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(128), ONE_TO_TEN); + assertSize(2, blocks); + assertLiteralBlock(ONE_TO_TEN, blocks.get(0)); + } + + @Test + public void nonCompressableWithLengthGreaterThanLiteralMaxButLessThanTwiceWindowSize() throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(8), ONE_TO_TEN); + assertSize(3, blocks); + assertLiteralBlock(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 }, blocks.get(0)); + assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(1)); + } + + @Test + public void nonCompressableWithLengthThatForcesWindowSlide() throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(4), ONE_TO_TEN); + assertSize(4, blocks); + assertLiteralBlock(new byte[] { 1, 2, 3, 4, }, blocks.get(0)); + assertLiteralBlock(new byte[] { 5, 6, 7, 8 }, blocks.get(1)); + assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(2)); + } + + @Test + public void nonCompressableSentAsSingleBytes() throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(8), stagger(ONE_TO_TEN)); + assertSize(3, blocks); + assertLiteralBlock(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 }, blocks.get(0)); + assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(1)); + } + + @Test + public void blaExampleWithFullArrayAvailableForCompression() + throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(128), BLA); + assertSize(4, blocks); + assertLiteralBlock("Blah b", blocks.get(0)); + assertBackReference(5, 18, blocks.get(1)); + assertLiteralBlock("!", blocks.get(2)); + } + + @Test + public void blaExampleWithShorterBackReferenceLength() throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(128, 3, 5, 0, 0), BLA); + assertSize(7, blocks); + assertLiteralBlock("Blah b", blocks.get(0)); + assertBackReference(5, 5, blocks.get(1)); + assertBackReference(5, 5, blocks.get(2)); + assertBackReference(5, 5, blocks.get(3)); + assertBackReference(5, 3, blocks.get(4)); + assertLiteralBlock("!", blocks.get(5)); + } + + @Test + public void blaExampleSmallerWindowSize() throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(8), BLA); + assertSize(6, blocks); + assertLiteralBlock("Blah b", blocks.get(0)); + assertBackReference(5, 7, blocks.get(1)); + assertBackReference(5, 3, blocks.get(2)); + assertBackReference(5, 7, blocks.get(3)); + assertLiteralBlock("h!", blocks.get(4)); + } + + @Test + public void blaExampleWithSingleByteWrites() throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(128), stagger(BLA)); + assertEquals(9, blocks.size()); + assertLiteralBlock("Blah b", blocks.get(0)); + assertBackReference(5, 3, blocks.get(1)); + assertBackReference(5, 3, blocks.get(2)); + assertBackReference(5, 3, blocks.get(3)); + assertBackReference(5, 3, blocks.get(4)); + assertBackReference(5, 3, blocks.get(5)); + assertBackReference(5, 3, blocks.get(6)); + assertLiteralBlock("!", blocks.get(7)); + } + + @Test + public void samIAmExampleWithFullArrayAvailableForCompression() throws IOException { + List<LZ77Compressor.Block> blocks = compress(newParameters(1024), SAM); + assertEquals(21, blocks.size()); + assertLiteralBlock("I am Sam\n\n", blocks.get(0)); + assertBackReference(5, 3, blocks.get(1)); + assertLiteralBlock(" ", blocks.get(2)); + assertBackReference(14, 4, blocks.get(3)); + assertLiteralBlock("\n\nThat", blocks.get(4)); + assertBackReference(20, 4, blocks.get(5)); + assertLiteralBlock("-I-am!", blocks.get(6)); + assertBackReference(15, 16, blocks.get(7)); + assertLiteralBlock("I do not like\nt", blocks.get(8)); + assertBackReference(29, 14, blocks.get(9)); + assertLiteralBlock("\nDo you", blocks.get(10)); + assertBackReference(28, 5, blocks.get(11)); + assertLiteralBlock(" green eggs and ham?\n", blocks.get(12)); + assertBackReference(63, 14, blocks.get(13)); + assertLiteralBlock(" them,", blocks.get(14)); + assertBackReference(64, 9, blocks.get(15)); + assertLiteralBlock(".", blocks.get(16)); + assertBackReference(30, 15, blocks.get(17)); + assertBackReference(65, 18, blocks.get(18)); + assertLiteralBlock(".", blocks.get(19)); + } + + @Test + public void blaExampleWithPrefill() throws IOException { + final List<LZ77Compressor.Block> blocks = new ArrayList<>(); + LZ77Compressor c = new LZ77Compressor(newParameters(128), new LZ77Compressor.Callback() { + @Override + public void accept(LZ77Compressor.Block block) { + //System.err.println(block); + if (block instanceof LZ77Compressor.LiteralBlock) { + // replace with a real copy of data so tests + // can see the results as they've been when + // the callback has been called + LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block; + int len = b.getLength(); + block = new LZ77Compressor.LiteralBlock( + Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len), + 0, len); + } + blocks.add(block); + } + }); + c.prefill(Arrays.copyOfRange(BLA, 0, 6)); + c.compress(Arrays.copyOfRange(BLA, 6, BLA.length)); + c.finish(); + assertSize(3, blocks); + assertBackReference(5, 18, blocks.get(0)); + assertLiteralBlock("!", blocks.get(1)); + } + + @Test + public void blaExampleWithShortPrefill() throws IOException { + final List<LZ77Compressor.Block> blocks = new ArrayList<>(); + LZ77Compressor c = new LZ77Compressor(newParameters(128), new LZ77Compressor.Callback() { + @Override + public void accept(LZ77Compressor.Block block) { + //System.err.println(block); + if (block instanceof LZ77Compressor.LiteralBlock) { + // replace with a real copy of data so tests + // can see the results as they've been when + // the callback has been called + LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block; + int len = b.getLength(); + block = new LZ77Compressor.LiteralBlock( + Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len), + 0, len); + } + blocks.add(block); + } + }); + c.prefill(Arrays.copyOfRange(BLA, 0, 2)); + c.compress(Arrays.copyOfRange(BLA, 2, BLA.length)); + c.finish(); + assertSize(4, blocks); + assertLiteralBlock("ah b", blocks.get(0)); + assertBackReference(5, 18, blocks.get(1)); + assertLiteralBlock("!", blocks.get(2)); + } + + @Test + public void blaExampleWithPrefillBiggerThanWindowSize() throws IOException { + final List<LZ77Compressor.Block> blocks = new ArrayList<>(); + LZ77Compressor c = new LZ77Compressor(newParameters(4), new LZ77Compressor.Callback() { + @Override + public void accept(LZ77Compressor.Block block) { + //System.err.println(block); + if (block instanceof LZ77Compressor.LiteralBlock) { + // replace with a real copy of data so tests + // can see the results as they've been when + // the callback has been called + LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block; + int len = b.getLength(); + block = new LZ77Compressor.LiteralBlock( + Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len), + 0, len); + } + blocks.add(block); + } + }); + c.prefill(Arrays.copyOfRange(BLA, 0, 6)); + c.compress(Arrays.copyOfRange(BLA, 6, BLA.length)); + c.finish(); + assertSize(6, blocks); + assertLiteralBlock("lah ", blocks.get(0)); + assertLiteralBlock("blah", blocks.get(1)); + assertLiteralBlock(" bla", blocks.get(2)); + assertLiteralBlock("h bl", blocks.get(3)); + assertLiteralBlock("ah!", blocks.get(4)); + } + + @Test(expected = IllegalStateException.class) + public void cantPrefillTwice() { + LZ77Compressor c = new LZ77Compressor(newParameters(128), new LZ77Compressor.Callback() { + @Override + public void accept(LZ77Compressor.Block block) { + } + }); + c.prefill(Arrays.copyOfRange(BLA, 0, 2)); + c.prefill(Arrays.copyOfRange(BLA, 2, 4)); + } + + @Test(expected = IllegalStateException.class) + public void cantPrefillAfterCompress() throws IOException { + LZ77Compressor c = new LZ77Compressor(newParameters(128), new LZ77Compressor.Callback() { + @Override + public void accept(LZ77Compressor.Block block) { + } + }); + c.compress(Arrays.copyOfRange(BLA, 0, 2)); + c.prefill(Arrays.copyOfRange(BLA, 2, 4)); + } + + private static final void assertSize(int expectedSize, List<LZ77Compressor.Block> blocks) { + assertEquals(expectedSize, blocks.size()); + assertEquals(LZ77Compressor.Block.BlockType.EOD, blocks.get(expectedSize - 1).getType()); + } + + private static final void assertLiteralBlock(String expectedContent, LZ77Compressor.Block block) + throws IOException { + assertLiteralBlock(expectedContent.getBytes("ASCII"), block); + } + + private static final void assertLiteralBlock(byte[] expectedContent, LZ77Compressor.Block block) { + assertEquals(LZ77Compressor.LiteralBlock.class, block.getClass()); + assertArrayEquals(expectedContent, ((LZ77Compressor.LiteralBlock) block).getData()); + } + + private static final void assertBackReference(int expectedOffset, int expectedLength, LZ77Compressor.Block block) { + assertEquals(LZ77Compressor.BackReference.class, block.getClass()); + LZ77Compressor.BackReference b = (LZ77Compressor.BackReference) block; + assertEquals(expectedOffset, b.getOffset()); + assertEquals(expectedLength, b.getLength()); + } + + private static final byte[][] stagger(byte[] data) { + byte[][] r = new byte[data.length][1]; + for (int i = 0; i < data.length; i++) { + r[i][0] = data[i]; + } + return r; + } + + private static Parameters newParameters(int windowSize) { + return Parameters.builder(windowSize).build(); + } + + private static Parameters newParameters(int windowSize, int minBackReferenceLength, int maxBackReferenceLength, + int maxOffset, int maxLiteralLength) { + return Parameters.builder(windowSize) + .withMinBackReferenceLength(minBackReferenceLength) + .withMaxBackReferenceLength(maxBackReferenceLength) + .withMaxOffset(maxOffset) + .withMaxLiteralLength(maxLiteralLength) + .tunedForCompressionRatio() + .build(); + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lz77support/ParametersTest.java b/src/test/java/org/apache/commons/compress/compressors/lz77support/ParametersTest.java new file mode 100644 index 000000000..44d8ac409 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz77support/ParametersTest.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz77support; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class ParametersTest { + + @Test + public void defaultConstructor() { + Parameters p = newParameters(128); + assertEquals(128, p.getWindowSize()); + assertEquals(3, p.getMinBackReferenceLength()); + assertEquals(127, p.getMaxBackReferenceLength()); + assertEquals(127, p.getMaxOffset()); + assertEquals(128, p.getMaxLiteralLength()); + } + + @Test + public void minBackReferenceLengthIsAtLeastThree() { + Parameters p = newParameters(128, 2, 3, 4, 5); + assertEquals(3, p.getMinBackReferenceLength()); + } + + @Test + public void maxBackReferenceLengthIsMinBackReferenceLengthWhenSmallerThanMinBackReferenceLength() { + Parameters p = newParameters(128, 2, 2, 4, 5); + assertEquals(3, p.getMaxBackReferenceLength()); + } + + @Test + public void maxBackReferenceLengthIsMinBackReferenceLengthWhenSmallerThanMinBackReferenceLengthReversedInvocationOrder() { + Parameters p = Parameters.builder(128) + .withMaxBackReferenceLength(2) + .withMinBackReferenceLength(2) + .withMaxOffset(4) + .withMaxLiteralLength(5) + .build(); + assertEquals(3, p.getMaxBackReferenceLength()); + } + + @Test + public void maxBackReferenceLengthIsMinBackReferenceLengthIfBothAreEqual() { + Parameters p = newParameters(128, 2, 3, 4, 5); + assertEquals(3, p.getMaxBackReferenceLength()); + } + + @Test + public void maxOffsetIsWindowSizeMinus1IfSetTo0() { + Parameters p = newParameters(128, 2, 3, 0, 5); + assertEquals(127, p.getMaxOffset()); + } + + @Test + public void maxOffsetIsWindowSizeMinus1IfSetToANegativeValue() { + Parameters p = newParameters(128, 2, 3, -1, 5); + assertEquals(127, p.getMaxOffset()); + } + + @Test + public void maxOffsetIsWindowSizeMinus1IfBiggerThanWindowSize() { + Parameters p = newParameters(128, 2, 3, 129, 5); + assertEquals(127, p.getMaxOffset()); + } + + @Test + public void maxLiteralLengthIsWindowSizeIfSetTo0() { + Parameters p = newParameters(128, 2, 3, 4, 0); + assertEquals(128, p.getMaxLiteralLength()); + } + + @Test + public void maxLiteralLengthIsWindowSizeIfSetToANegativeValue() { + Parameters p = newParameters(128, 2, 3, 0, -1); + assertEquals(128, p.getMaxLiteralLength()); + } + + @Test + public void maxLiteralLengthIsWindowSizeIfSetToAValueTooBigToHoldInSlidingWindow() { + Parameters p = newParameters(128, 2, 3, 0, 259); + assertEquals(128, p.getMaxLiteralLength()); + } + + @Test + public void allParametersUsuallyTakeTheirSpecifiedValues() { + Parameters p = newParameters(256, 4, 5, 6, 7); + assertEquals(256, p.getWindowSize()); + assertEquals(4, p.getMinBackReferenceLength()); + assertEquals(5, p.getMaxBackReferenceLength()); + assertEquals(6, p.getMaxOffset()); + assertEquals(7, p.getMaxLiteralLength()); + } + + @Test(expected = IllegalArgumentException.class) + public void windowSizeMustNotBeSmallerThanMinBackReferenceLength() { + newParameters(128, 200, 300, 400, 500); + } + + @Test(expected = IllegalArgumentException.class) + public void windowSizeMustBeAPowerOfTwo() { + newParameters(100, 200, 300, 400, 500); + } + + private static Parameters newParameters(int windowSize) { + return Parameters.builder(windowSize).build(); + } + + private static Parameters newParameters(int windowSize, int minBackReferenceLength, int maxBackReferenceLength, + int maxOffset, int maxLiteralLength) { + return Parameters.builder(windowSize) + .withMinBackReferenceLength(minBackReferenceLength) + .withMaxBackReferenceLength(maxBackReferenceLength) + .withMaxOffset(maxOffset) + .withMaxLiteralLength(maxLiteralLength) + .build(); + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/lzma/LZMAUtilsTestCase.java b/src/test/java/org/apache/commons/compress/compressors/lzma/LZMAUtilsTestCase.java new file mode 100644 index 000000000..17b38067d --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lzma/LZMAUtilsTestCase.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lzma; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class LZMAUtilsTestCase { + + @Test + public void testIsCompressedFilename() { + assertFalse(LZMAUtils.isCompressedFilename("")); + assertFalse(LZMAUtils.isCompressedFilename(".lzma")); + + assertTrue(LZMAUtils.isCompressedFilename("x.lzma")); + assertTrue(LZMAUtils.isCompressedFilename("x-lzma")); + + assertFalse(LZMAUtils.isCompressedFilename("xxgz")); + assertFalse(LZMAUtils.isCompressedFilename("lzmaz")); + assertFalse(LZMAUtils.isCompressedFilename("xaz")); + + assertFalse(LZMAUtils.isCompressedFilename("x.lzma ")); + assertFalse(LZMAUtils.isCompressedFilename("x.lzma\n")); + assertFalse(LZMAUtils.isCompressedFilename("x.lzma.y")); + } + + @Test + public void testGetUncompressedFilename() { + assertEquals("", LZMAUtils.getUncompressedFilename("")); + assertEquals(".lzma", LZMAUtils.getUncompressedFilename(".lzma")); + + assertEquals("x", LZMAUtils.getUncompressedFilename("x.lzma")); + assertEquals("x", LZMAUtils.getUncompressedFilename("x-lzma")); + + assertEquals("x.lzma ", LZMAUtils.getUncompressedFilename("x.lzma ")); + assertEquals("x.lzma\n", LZMAUtils.getUncompressedFilename("x.lzma\n")); + assertEquals("x.lzma.y", LZMAUtils.getUncompressedFilename("x.lzma.y")); + } + + @Test + public void testGetCompressedFilename() { + assertEquals(".lzma", LZMAUtils.getCompressedFilename("")); + assertEquals("x.lzma", LZMAUtils.getCompressedFilename("x")); + + assertEquals("x.wmf .lzma", LZMAUtils.getCompressedFilename("x.wmf ")); + assertEquals("x.wmf\n.lzma", LZMAUtils.getCompressedFilename("x.wmf\n")); + assertEquals("x.wmf.y.lzma", LZMAUtils.getCompressedFilename("x.wmf.y")); + } + + @Test + public void testMatches() { + final byte[] data = { + (byte) 0x5D, 0, 0, + }; + assertFalse(LZMAUtils.matches(data, 2)); + assertTrue(LZMAUtils.matches(data, 3)); + assertTrue(LZMAUtils.matches(data, 4)); + data[2] = '0'; + assertFalse(LZMAUtils.matches(data, 3)); + } + + @Test + public void testCachingIsEnabledByDefaultAndLZMAIsPresent() { + assertEquals(LZMAUtils.CachedAvailability.CACHED_AVAILABLE, LZMAUtils.getCachedLZMAAvailability()); + assertTrue(LZMAUtils.isLZMACompressionAvailable()); + } + + @Test + public void testCanTurnOffCaching() { + try { + LZMAUtils.setCacheLZMAAvailablity(false); + assertEquals(LZMAUtils.CachedAvailability.DONT_CACHE, LZMAUtils.getCachedLZMAAvailability()); + assertTrue(LZMAUtils.isLZMACompressionAvailable()); + } finally { + LZMAUtils.setCacheLZMAAvailablity(true); + } + } + + @Test + public void testTurningOnCachingReEvaluatesAvailability() { + try { + LZMAUtils.setCacheLZMAAvailablity(false); + assertEquals(LZMAUtils.CachedAvailability.DONT_CACHE, LZMAUtils.getCachedLZMAAvailability()); + LZMAUtils.setCacheLZMAAvailablity(true); + assertEquals(LZMAUtils.CachedAvailability.CACHED_AVAILABLE, LZMAUtils.getCachedLZMAAvailability()); + } finally { + LZMAUtils.setCacheLZMAAvailablity(true); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/pack200/Pack200UtilsTest.java b/src/test/java/org/apache/commons/compress/compressors/pack200/Pack200UtilsTest.java new file mode 100644 index 000000000..3874f750b --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/pack200/Pack200UtilsTest.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.pack200; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.HashMap; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class Pack200UtilsTest extends AbstractTestCase { + + @Test + public void testNormalize() throws Throwable { + final File input = getFile("bla.jar"); + final File[] output = createTempDirAndFile(); + try { + Pack200Utils.normalize(input, output[1], + new HashMap<String, String>()); + try (FileInputStream is = new FileInputStream(output[1])) { + final ArchiveInputStream in = new ArchiveStreamFactory() + .createArchiveInputStream("jar", is); + + ArchiveEntry entry = in.getNextEntry(); + while (entry != null) { + final File archiveEntry = new File(dir, entry.getName()); + archiveEntry.getParentFile().mkdirs(); + if (entry.isDirectory()) { + archiveEntry.mkdir(); + entry = in.getNextEntry(); + continue; + } + final OutputStream out = new FileOutputStream(archiveEntry); + IOUtils.copy(in, out); + out.close(); + entry = in.getNextEntry(); + } + + in.close(); + } + } finally { + output[1].delete(); + output[0].delete(); + } + } + + @Test + public void testNormalizeInPlace() throws Throwable { + final File input = getFile("bla.jar"); + final File[] output = createTempDirAndFile(); + try { + FileInputStream is = new FileInputStream(input); + OutputStream os = null; + try { + os = new FileOutputStream(output[1]); + IOUtils.copy(is, os); + } finally { + is.close(); + if (os != null) { + os.close(); + } + } + + Pack200Utils.normalize(output[1]); + is = new FileInputStream(output[1]); + try { + final ArchiveInputStream in = new ArchiveStreamFactory() + .createArchiveInputStream("jar", is); + + ArchiveEntry entry = in.getNextEntry(); + while (entry != null) { + final File archiveEntry = new File(dir, entry.getName()); + archiveEntry.getParentFile().mkdirs(); + if (entry.isDirectory()) { + archiveEntry.mkdir(); + entry = in.getNextEntry(); + continue; + } + final OutputStream out = new FileOutputStream(archiveEntry); + IOUtils.copy(in, out); + out.close(); + entry = in.getNextEntry(); + } + + in.close(); + } finally { + is.close(); + } + } finally { + output[1].delete(); + output[0].delete(); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java new file mode 100644 index 000000000..879456e32 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.snappy; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Test; + +public final class FramedSnappyCompressorInputStreamTest + extends AbstractTestCase { + + @Test + public void testMatches() throws IOException { + assertFalse(FramedSnappyCompressorInputStream.matches(new byte[10], 10)); + final byte[] b = new byte[12]; + final File input = getFile("bla.tar.sz"); + try (FileInputStream in = new FileInputStream(input)) { + IOUtils.readFully(in, b); + } + assertFalse(FramedSnappyCompressorInputStream.matches(b, 9)); + assertTrue(FramedSnappyCompressorInputStream.matches(b, 10)); + assertTrue(FramedSnappyCompressorInputStream.matches(b, 12)); + } + + /** + * Something big enough to make buffers slide. + */ + @Test + public void testLoremIpsum() throws Exception { + final File outputSz = new File(dir, "lorem-ipsum.1"); + final File outputGz = new File(dir, "lorem-ipsum.2"); + try (FileInputStream isSz = new FileInputStream(getFile("lorem-ipsum.txt.sz"))) { + InputStream in = new FramedSnappyCompressorInputStream(isSz); + FileOutputStream out = null; + try { + out = new FileOutputStream(outputSz); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + try (FileInputStream isGz = new FileInputStream(getFile("lorem-ipsum.txt.gz"))) { + in = new GzipCompressorInputStream(isGz); + try { + out = new FileOutputStream(outputGz); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + + try (FileInputStream sz = new FileInputStream(outputSz)) { + try (FileInputStream gz = new FileInputStream(outputGz)) { + assertArrayEquals(IOUtils.toByteArray(sz), + IOUtils.toByteArray(gz)); + } + } + } + + @Test + public void testRemainingChunkTypes() throws Exception { + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (FileInputStream isSz = new FileInputStream(getFile("mixed.txt.sz"))) { + final FramedSnappyCompressorInputStream in = new FramedSnappyCompressorInputStream(isSz); + IOUtils.copy(in, out); + out.close(); + } + + assertArrayEquals(new byte[] { '1', '2', '3', '4', + '5', '6', '7', '8', '9', + '5', '6', '7', '8', '9', + '5', '6', '7', '8', '9', + '5', '6', '7', '8', '9', + '5', '6', '7', '8', '9', 10, + '1', '2', '3', '4', + '1', '2', '3', '4', + }, out.toByteArray()); + } + + @Test + public void testAvailable() throws Exception { + try (FileInputStream isSz = new FileInputStream(getFile("mixed.txt.sz"))) { + final FramedSnappyCompressorInputStream in = new FramedSnappyCompressorInputStream(isSz); + assertEquals(0, in.available()); // no chunk read so far + assertEquals('1', in.read()); + assertEquals(3, in.available()); // remainder of first uncompressed block + assertEquals(3, in.read(new byte[5], 0, 3)); + assertEquals('5', in.read()); + assertEquals(0, in.available()); // end of chunk, must read next one + assertEquals(4, in.read(new byte[5], 0, 4)); + assertEquals('5', in.read()); + in.close(); + } + } + + @Test + public void testUnskippableChunk() { + final byte[] input = new byte[] { + (byte) 0xff, 6, 0, 0, 's', 'N', 'a', 'P', 'p', 'Y', + 2, 2, 0, 0, 1, 1 + }; + try { + final FramedSnappyCompressorInputStream in = + new FramedSnappyCompressorInputStream(new ByteArrayInputStream(input)); + in.read(); + fail("expected an exception"); + in.close(); + } catch (final IOException ex) { + assertTrue(ex.getMessage().contains("unskippable chunk")); + } + } + + @Test + public void testChecksumUnmasking() { + testChecksumUnmasking(0xc757l); + testChecksumUnmasking(0xffffc757l); + } + + @Test + public void readIWAFile() throws Exception { + try (ZipFile zip = new ZipFile(getFile("testNumbersNew.numbers"))) { + try (InputStream is = zip.getInputStream(zip.getEntry("Index/Document.iwa"))) { + final FramedSnappyCompressorInputStream in = + new FramedSnappyCompressorInputStream(is, FramedSnappyDialect.IWORK_ARCHIVE); + FileOutputStream out = null; + try { + out = new FileOutputStream(new File(dir, "snappyIWATest.raw")); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-358" + */ + @Test + public void readIWAFileWithBiggerOffset() throws Exception { + File o = new File(dir, "COMPRESS-358.raw"); + try (InputStream is = new FileInputStream(getFile("COMPRESS-358.iwa")); + FramedSnappyCompressorInputStream in = + new FramedSnappyCompressorInputStream(is, 1<<16, FramedSnappyDialect.IWORK_ARCHIVE); + FileOutputStream out = new FileOutputStream(o)) { + IOUtils.copy(in, out); + } + try (FileInputStream a = new FileInputStream(o); + FileInputStream e = new FileInputStream(getFile("COMPRESS-358.uncompressed"))) { + byte[] expected = IOUtils.toByteArray(e); + byte[] actual = IOUtils.toByteArray(a); + assertArrayEquals(expected, actual); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.sz"); + try (InputStream is = new FileInputStream(input)) { + final FramedSnappyCompressorInputStream in = + new FramedSnappyCompressorInputStream(is); + IOUtils.toByteArray(in); + assertEquals(-1, in.read()); + assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.sz"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final FramedSnappyCompressorInputStream in = + new FramedSnappyCompressorInputStream(is); + IOUtils.toByteArray(in); + assertEquals(-1, in.read(buf)); + assertEquals(-1, in.read(buf)); + in.close(); + } + } + + private void testChecksumUnmasking(final long x) { + assertEquals(Long.toHexString(x), + Long.toHexString(FramedSnappyCompressorInputStream + .unmask(mask(x)))); + } + + private long mask(final long x) { + return (((x >>> 15) | (x << 17)) + + FramedSnappyCompressorInputStream.MASK_OFFSET) + & 0xffffFFFFL; + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java new file mode 100644 index 000000000..350ef4ee7 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.snappy; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Random; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.lz77support.Parameters; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public final class SnappyRoundtripTest extends AbstractTestCase { + + private void roundTripTest(String testFile) throws IOException { + roundTripTest(getFile(testFile), + SnappyCompressorOutputStream.createParameterBuilder(SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE) + .build()); + } + + private void roundTripTest(final File input, Parameters params) throws IOException { + long start = System.currentTimeMillis(); + final File outputSz = new File(dir, input.getName() + ".raw.sz"); + try (FileInputStream is = new FileInputStream(input); + FileOutputStream os = new FileOutputStream(outputSz); + SnappyCompressorOutputStream sos = new SnappyCompressorOutputStream(os, input.length(), params)) { + IOUtils.copy(is, sos); + } + System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); + start = System.currentTimeMillis(); + try (FileInputStream is = new FileInputStream(input); + SnappyCompressorInputStream sis = new SnappyCompressorInputStream(new FileInputStream(outputSz), + params.getWindowSize())) { + byte[] expected = IOUtils.toByteArray(is); + byte[] actual = IOUtils.toByteArray(sis); + Assert.assertArrayEquals(expected, actual); + } + System.err.println(outputSz.getName() + " read after " + (System.currentTimeMillis() - start) + "ms"); + } + private void roundTripTest(final byte[] input, Parameters params) throws IOException { + long start = System.currentTimeMillis(); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + try ( + SnappyCompressorOutputStream sos = new SnappyCompressorOutputStream(os, input.length, params)) { + sos.write(input); + } + System.err.println("byte array" + " written, uncompressed bytes: " + input.length + + ", compressed bytes: " + os.size() + " after " + (System.currentTimeMillis() - start) + "ms"); + start = System.currentTimeMillis(); + try ( + SnappyCompressorInputStream sis = new SnappyCompressorInputStream(new ByteArrayInputStream(os.toByteArray()), + params.getWindowSize())) { + byte[] expected = input; + byte[] actual = IOUtils.toByteArray(sis); + Assert.assertArrayEquals(expected, actual); + } + System.err.println("byte array" + " read after " + (System.currentTimeMillis() - start) + "ms"); + } + + // should yield decent compression + @Test + public void blaTarRoundtrip() throws IOException { + System.err.println("Configuration: default"); + roundTripTest("bla.tar"); + } + + @Test + public void blaTarRoundtripTunedForSpeed() throws IOException { + System.err.println("Configuration: tuned for speed"); + roundTripTest(getFile("bla.tar"), + SnappyCompressorOutputStream.createParameterBuilder(SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE) + .tunedForSpeed() + .build()); + } + + @Test + public void blaTarRoundtripTunedForCompressionRatio() throws IOException { + System.err.println("Configuration: tuned for compression ratio"); + roundTripTest(getFile("bla.tar"), + SnappyCompressorOutputStream.createParameterBuilder(SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE) + .tunedForCompressionRatio() + .build()); + } + + // yields no compression at all + @Test + public void gzippedLoremIpsumRoundtrip() throws IOException { + roundTripTest("lorem-ipsum.txt.gz"); + } + + // yields no compression at all + @Test + public void biggerFileRoundtrip() throws IOException { + roundTripTest("COMPRESS-256.7z"); + } + + @Test + public void tryReallyBigOffset() throws IOException { + // "normal" Snappy files will never reach offsets beyond + // 16bits (i.e. those using four bytes to encode the length) + // as the block size is only 32k. This means we never execute + // the code for four-byte length copies in either stream class + // using real-world Snappy files. + // This is an artifical stream using a bigger block size that + // may not even be expandable by other Snappy implementations. + // Start with the four byte sequence 0000 after that add > 64k + // of random noise that doesn't contain any 0000 at all, then + // add 0000. + File f = new File(dir, "reallyBigOffsetTest"); + ByteArrayOutputStream fs = new ByteArrayOutputStream((1<<16) + 1024); + fs.write(0); + fs.write(0); + fs.write(0); + fs.write(0); + int cnt = 1 << 16 + 5; + Random r = new Random(); + for (int i = 0 ; i < cnt; i++) { + fs.write(r.nextInt(255) + 1); + } + fs.write(0); + fs.write(0); + fs.write(0); + fs.write(0); + + roundTripTest(fs.toByteArray(), newParameters(1 << 17, 4, 64, 1 << 17 - 1, 1 << 17 - 1)); + } + + @Test + public void tryReallyLongLiterals() throws IOException { + // "normal" Snappy files will never reach literal blocks with + // length beyond 16bits (i.e. those using three or four bytes + // to encode the length) as the block size is only 32k. This + // means we never execute the code for the three/four byte + // length literals in either stream class using real-world + // Snappy files. + // What we'd need would be a sequence of bytes with no four + // byte subsequence repeated that is longer than 64k, we try + // our best with random, but will probably only hit the three byte + // methods in a few lucky cases. + // The four byte methods would require even more luck and a + // buffer (and a file written to disk) that was 2^5 bigger + // than the buffer used here. + File f = new File(dir, "reallyBigLiteralTest"); + try (FileOutputStream fs = new FileOutputStream(f)) { + int cnt = 1 << 19; + Random r = new Random(); + for (int i = 0 ; i < cnt; i++) { + fs.write(r.nextInt(256)); + } + } + roundTripTest(f, newParameters(1 << 18, 4, 64, 1 << 16 - 1, 1 << 18 - 1)); + } + + private static Parameters newParameters(int windowSize, int minBackReferenceLength, int maxBackReferenceLength, + int maxOffset, int maxLiteralLength) { + return Parameters.builder(windowSize) + .withMinBackReferenceLength(minBackReferenceLength) + .withMaxBackReferenceLength(maxBackReferenceLength) + .withMaxOffset(maxOffset) + .withMaxLiteralLength(maxLiteralLength) + .build(); + } +} diff --git a/src/test/java/org/apache/commons/compress/compressors/xz/XZCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/xz/XZCompressorInputStreamTest.java new file mode 100644 index 000000000..a43dca3de --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/xz/XZCompressorInputStreamTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.xz; + +import static org.apache.commons.compress.AbstractTestCase.getFile; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class XZCompressorInputStreamTest { + @Test + public void redundantTestOfAlmostDeprecatedMatchesMethod() { + final byte[] data = { + (byte) 0xFD, '7', 'z', 'X', 'Z', '\0' + }; + Assert.assertFalse(XZCompressorInputStream.matches(data, 5)); + Assert.assertTrue(XZCompressorInputStream.matches(data, 6)); + Assert.assertTrue(XZCompressorInputStream.matches(data, 7)); + data[5] = '0'; + Assert.assertFalse(XZCompressorInputStream.matches(data, 6)); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofNoDecompressConcatenated() throws IOException { + singleByteReadConsistentlyReturnsMinusOneAtEof(false); + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEofDecompressConcatenated() throws IOException { + singleByteReadConsistentlyReturnsMinusOneAtEof(true); + } + + private void singleByteReadConsistentlyReturnsMinusOneAtEof(boolean decompressConcatenated) throws IOException { + final File input = getFile("bla.tar.xz"); + try (InputStream is = new FileInputStream(input)) { + final XZCompressorInputStream in = + new XZCompressorInputStream(is, decompressConcatenated); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofNoDecompressConcatenated() throws IOException { + multiByteReadConsistentlyReturnsMinusOneAtEof(false); + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEofDecompressConcatenated() throws IOException { + multiByteReadConsistentlyReturnsMinusOneAtEof(true); + } + + private void multiByteReadConsistentlyReturnsMinusOneAtEof(boolean decompressConcatenated) throws IOException { + final File input = getFile("bla.tar.xz"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final XZCompressorInputStream in = + new XZCompressorInputStream(is, decompressConcatenated); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/xz/XZCompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/xz/XZCompressorOutputStreamTest.java new file mode 100644 index 000000000..49e66f871 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/xz/XZCompressorOutputStreamTest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.xz; + +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + + +/** + * Unit tests for class {@link XZCompressorOutputStream}. + * + * @date 16.06.2017 + * @see XZCompressorOutputStream + **/ +public class XZCompressorOutputStreamTest { + + + @Test + public void testWrite() throws IOException { + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(4590); + try (XZCompressorOutputStream xZCompressorOutputStream = new XZCompressorOutputStream(byteArrayOutputStream)) { + xZCompressorOutputStream.write(4590); + } + + try (XZCompressorInputStream xZCompressorInputStream = + new XZCompressorInputStream(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()))) { + assertEquals(4590 % 256, xZCompressorInputStream.read()); + assertEquals(-1, xZCompressorInputStream.read()); + } + } + + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/xz/XZUtilsTestCase.java b/src/test/java/org/apache/commons/compress/compressors/xz/XZUtilsTestCase.java new file mode 100644 index 000000000..e706961dc --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/xz/XZUtilsTestCase.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.xz; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class XZUtilsTestCase { + + @Test + public void testIsCompressedFilename() { + assertFalse(XZUtils.isCompressedFilename("")); + assertFalse(XZUtils.isCompressedFilename(".xz")); + + assertTrue(XZUtils.isCompressedFilename("x.txz")); + assertTrue(XZUtils.isCompressedFilename("x.xz")); + assertTrue(XZUtils.isCompressedFilename("x-xz")); + + assertFalse(XZUtils.isCompressedFilename("xxgz")); + assertFalse(XZUtils.isCompressedFilename("xzz")); + assertFalse(XZUtils.isCompressedFilename("xaz")); + + assertFalse(XZUtils.isCompressedFilename("x.txz ")); + assertFalse(XZUtils.isCompressedFilename("x.txz\n")); + assertFalse(XZUtils.isCompressedFilename("x.txz.y")); + } + + @Test + public void testGetUncompressedFilename() { + assertEquals("", XZUtils.getUncompressedFilename("")); + assertEquals(".xz", XZUtils.getUncompressedFilename(".xz")); + + assertEquals("x.tar", XZUtils.getUncompressedFilename("x.txz")); + assertEquals("x", XZUtils.getUncompressedFilename("x.xz")); + assertEquals("x", XZUtils.getUncompressedFilename("x-xz")); + + assertEquals("x.txz ", XZUtils.getUncompressedFilename("x.txz ")); + assertEquals("x.txz\n", XZUtils.getUncompressedFilename("x.txz\n")); + assertEquals("x.txz.y", XZUtils.getUncompressedFilename("x.txz.y")); + } + + @Test + public void testGetCompressedFilename() { + assertEquals(".xz", XZUtils.getCompressedFilename("")); + assertEquals("x.xz", XZUtils.getCompressedFilename("x")); + + assertEquals("x.txz", XZUtils.getCompressedFilename("x.tar")); + + assertEquals("x.wmf .xz", XZUtils.getCompressedFilename("x.wmf ")); + assertEquals("x.wmf\n.xz", XZUtils.getCompressedFilename("x.wmf\n")); + assertEquals("x.wmf.y.xz", XZUtils.getCompressedFilename("x.wmf.y")); + } + + @Test + public void testMatches() { + final byte[] data = { + (byte) 0xFD, '7', 'z', 'X', 'Z', '\0' + }; + assertFalse(XZUtils.matches(data, 5)); + assertTrue(XZUtils.matches(data, 6)); + assertTrue(XZUtils.matches(data, 7)); + data[5] = '0'; + assertFalse(XZUtils.matches(data, 6)); + } + + @Test + public void testCachingIsEnabledByDefaultAndXZIsPresent() { + assertEquals(XZUtils.CachedAvailability.CACHED_AVAILABLE, XZUtils.getCachedXZAvailability()); + assertTrue(XZUtils.isXZCompressionAvailable()); + } + + @Test + public void testCanTurnOffCaching() { + try { + XZUtils.setCacheXZAvailablity(false); + assertEquals(XZUtils.CachedAvailability.DONT_CACHE, XZUtils.getCachedXZAvailability()); + assertTrue(XZUtils.isXZCompressionAvailable()); + } finally { + XZUtils.setCacheXZAvailablity(true); + } + } + + @Test + public void testTurningOnCachingReEvaluatesAvailability() { + try { + XZUtils.setCacheXZAvailablity(false); + assertEquals(XZUtils.CachedAvailability.DONT_CACHE, XZUtils.getCachedXZAvailability()); + XZUtils.setCacheXZAvailablity(true); + assertEquals(XZUtils.CachedAvailability.CACHED_AVAILABLE, XZUtils.getCachedXZAvailability()); + } finally { + XZUtils.setCacheXZAvailablity(true); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/z/ZCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/z/ZCompressorInputStreamTest.java new file mode 100644 index 000000000..9e7786b72 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/z/ZCompressorInputStreamTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.z; + +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.SequenceInputStream; +import java.util.Enumeration; +import org.apache.commons.compress.utils.IOUtils; + +import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.mockito.Mockito.mock; +import static org.powermock.api.mockito.PowerMockito.doReturn; + + +/** + * Unit tests for class {@link ZCompressorInputStream}. + * + * @date 16.06.2017 + * @see ZCompressorInputStream + **/ +public class ZCompressorInputStreamTest { + + + @Test(expected = IOException.class) + public void testFailsToCreateZCompressorInputStreamAndThrowsIOException() throws IOException { + boolean java9 = false; + try { + Class.forName("java.lang.module.ModuleDescriptor"); + java9 = true; + } catch (Exception ex) { + // not Java9 + } + org.junit.Assume.assumeFalse("can't use PowerMock with Java9", java9); + + Enumeration<SequenceInputStream> enumeration = (Enumeration<SequenceInputStream>) mock(Enumeration.class); + SequenceInputStream sequenceInputStream = new SequenceInputStream(enumeration); + ZCompressorInputStream zCompressorInputStream = null; + + doReturn(false).when(enumeration).hasMoreElements(); + + zCompressorInputStream = new ZCompressorInputStream(sequenceInputStream); + + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.Z"); + try (InputStream is = new FileInputStream(input)) { + final ZCompressorInputStream in = + new ZCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("bla.tar.Z"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final ZCompressorInputStream in = + new ZCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java new file mode 100644 index 000000000..1d5f06659 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class ZstdCompressorInputStreamTest extends AbstractTestCase { + + /** + * Test bridge works fine. + * + * @throws IOException + */ + @Test + public void testZstdDecode() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + final File expected = getFile("zstandard.testdata"); + try (InputStream inputStream = new FileInputStream(input); + InputStream expectedStream = new FileInputStream(expected); + ZstdCompressorInputStream zstdInputStream = new ZstdCompressorInputStream(inputStream)) { + final byte[] b = new byte[97]; + IOUtils.readFully(expectedStream, b); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int readByte = -1; + while((readByte = zstdInputStream.read()) != -1) { + bos.write(readByte); + } + Assert.assertArrayEquals(b, bos.toByteArray()); + } + } + + @Test + public void testCachingIsEnabledByDefaultAndZstdUtilsPresent() { + assertEquals(ZstdUtils.CachedAvailability.CACHED_AVAILABLE, ZstdUtils.getCachedZstdAvailability()); + assertTrue(ZstdUtils.isZstdCompressionAvailable()); + } + + @Test + public void testCanTurnOffCaching() { + try { + ZstdUtils.setCacheZstdAvailablity(false); + assertEquals(ZstdUtils.CachedAvailability.DONT_CACHE, ZstdUtils.getCachedZstdAvailability()); + assertTrue(ZstdUtils.isZstdCompressionAvailable()); + } finally { + ZstdUtils.setCacheZstdAvailablity(true); + } + } + + @Test + public void testTurningOnCachingReEvaluatesAvailability() { + try { + ZstdUtils.setCacheZstdAvailablity(false); + assertEquals(ZstdUtils.CachedAvailability.DONT_CACHE, ZstdUtils.getCachedZstdAvailability()); + ZstdUtils.setCacheZstdAvailablity(true); + assertEquals(ZstdUtils.CachedAvailability.CACHED_AVAILABLE, ZstdUtils.getCachedZstdAvailability()); + } finally { + ZstdUtils.setCacheZstdAvailablity(true); + } + } + + @Test + public void shouldBeAbleToSkipAByte() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + Assert.assertEquals(1, in.skip(1)); + in.close(); + } + } + + @Test + public void singleByteReadWorksAsExpected() throws IOException { + + final File input = getFile("zstandard.testdata.zst"); + + final File original = getFile("zstandard.testdata"); + final long originalFileLength = original.length(); + + byte[] originalFileContent = new byte[((int) originalFileLength)]; + + try (InputStream ois = new FileInputStream(original)) { + ois.read(originalFileContent); + } + + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + + Assert.assertEquals(originalFileContent[0], in.read()); + in.close(); + } + } + + @Test + public void singleByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void multiByteReadConsistentlyReturnsMinusOneAtEof() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + byte[] buf = new byte[2]; + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read(buf)); + Assert.assertEquals(-1, in.read(buf)); + in.close(); + } + } + + @Test + public void testZstandardUnarchive() throws Exception { + final File input = getFile("bla.tar.zst"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("zstd", is); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdRoundtripTest.java new file mode 100644 index 000000000..6e9dfef32 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdRoundtripTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class ZstdRoundtripTest extends AbstractTestCase { + + private interface OutputStreamCreator { + ZstdCompressorOutputStream wrap(FileOutputStream os) throws IOException; + } + + @Test + public void directRoundtrip() throws Exception { + roundtrip(new OutputStreamCreator() { + @Override + public ZstdCompressorOutputStream wrap(FileOutputStream os) throws IOException { + return new ZstdCompressorOutputStream(os); + } + }); + } + + private void roundtrip(OutputStreamCreator oc) throws IOException { + File input = getFile("bla.tar"); + long start = System.currentTimeMillis(); + final File output = new File(dir, input.getName() + ".zstd"); + try (FileInputStream is = new FileInputStream(input); + FileOutputStream os = new FileOutputStream(output); + ZstdCompressorOutputStream zos = oc.wrap(os)) { + IOUtils.copy(is, zos); + } + System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + + ", compressed bytes: " + output.length() + " after " + (System.currentTimeMillis() - start) + "ms"); + start = System.currentTimeMillis(); + try (FileInputStream is = new FileInputStream(input); + ZstdCompressorInputStream zis = new ZstdCompressorInputStream(new FileInputStream(output))) { + byte[] expected = IOUtils.toByteArray(is); + byte[] actual = IOUtils.toByteArray(zis); + Assert.assertArrayEquals(expected, actual); + } + System.err.println(output.getName() + " read after " + (System.currentTimeMillis() - start) + "ms"); + } + + @Test + public void factoryRoundtrip() throws Exception { + File input = getFile("bla.tar"); + long start = System.currentTimeMillis(); + final File output = new File(dir, input.getName() + ".zstd"); + try (FileInputStream is = new FileInputStream(input); + FileOutputStream os = new FileOutputStream(output); + CompressorOutputStream zos = new CompressorStreamFactory().createCompressorOutputStream("zstd", os)) { + IOUtils.copy(is, zos); + } + start = System.currentTimeMillis(); + try (FileInputStream is = new FileInputStream(input); + CompressorInputStream zis = new CompressorStreamFactory() + .createCompressorInputStream("zstd", new FileInputStream(output))) { + byte[] expected = IOUtils.toByteArray(is); + byte[] actual = IOUtils.toByteArray(zis); + Assert.assertArrayEquals(expected, actual); + } + } + + @Test + public void roundtripWithCustomLevel() throws Exception { + roundtrip(new OutputStreamCreator() { + @Override + public ZstdCompressorOutputStream wrap(FileOutputStream os) throws IOException { + return new ZstdCompressorOutputStream(os, 1); + } + }); + } + + @Test + public void roundtripWithCloseFrameOnFlush() throws Exception { + roundtrip(new OutputStreamCreator() { + @Override + public ZstdCompressorOutputStream wrap(FileOutputStream os) throws IOException { + return new ZstdCompressorOutputStream(os, 3, true); + } + }); + } + + @Test + public void roundtripWithChecksum() throws Exception { + roundtrip(new OutputStreamCreator() { + @Override + public ZstdCompressorOutputStream wrap(FileOutputStream os) throws IOException { + return new ZstdCompressorOutputStream(os, 3, false, true); + } + }); + } + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java new file mode 100644 index 000000000..5bba0ad58 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +public class ZstdUtilsTest { + + @Test + public void testMatchesZstandardFrame() { + final byte[] data = { + (byte) 0x28, (byte) 0xB5, (byte) 0x2F, (byte) 0xFD, + }; + assertFalse(ZstdUtils.matches(data, 3)); + assertTrue(ZstdUtils.matches(data, 4)); + assertTrue(ZstdUtils.matches(data, 5)); + data[3] = '0'; + assertFalse(ZstdUtils.matches(data, 4)); + } + + @Test + public void testMatchesSkippableFrame() { + final byte[] data = { + 0, (byte) 0x2A, (byte) 0x4D, (byte) 0x18, + }; + assertFalse(ZstdUtils.matches(data, 4)); + for (byte b = (byte) 0x50; b < 0x60; b++) { + data[0] = b; + assertTrue(ZstdUtils.matches(data, 4)); + } + assertFalse(ZstdUtils.matches(data, 3)); + assertTrue(ZstdUtils.matches(data, 5)); + } +} diff --git a/src/test/java/org/apache/commons/compress/utils/BitInputStreamTest.java b/src/test/java/org/apache/commons/compress/utils/BitInputStreamTest.java new file mode 100644 index 000000000..92de71b4f --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/BitInputStreamTest.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.ByteOrder; + +import org.junit.Test; + +public class BitInputStreamTest { + + @Test(expected = IllegalArgumentException.class) + public void shouldNotAllowReadingOfANegativeAmountOfBits() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + bis.readBits(-1); + } + } + + @Test(expected = IllegalArgumentException.class) + public void shouldNotAllowReadingOfMoreThan63BitsAtATime() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + bis.readBits(64); + } + } + + @Test + public void testReading24BitsInLittleEndian() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0x000140f8, bis.readBits(24)); + } + } + + @Test + public void testReading24BitsInBigEndian() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.BIG_ENDIAN)) { + assertEquals(0x00f84001, bis.readBits(24)); + } + } + + @Test + public void testReading17BitsInLittleEndian() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0x000140f8, bis.readBits(17)); + } + } + + @Test + public void testReading17BitsInBigEndian() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.BIG_ENDIAN)) { + // 1-11110000-10000000 + assertEquals(0x0001f080, bis.readBits(17)); + } + } + + @Test + public void testReading30BitsInLittleEndian() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0x2f0140f8, bis.readBits(30)); + } + } + + @Test + public void testReading30BitsInBigEndian() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.BIG_ENDIAN)) { + // 111110-00010000-00000000-01001011 + assertEquals(0x3e10004b, bis.readBits(30)); + } + } + + @Test + public void testReading31BitsInLittleEndian() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0x2f0140f8, bis.readBits(31)); + } + } + + @Test + public void testReading31BitsInBigEndian() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.BIG_ENDIAN)) { + // 1111100-00100000-00000000-10010111 + assertEquals(0x7c200097, bis.readBits(31)); + } + } + + @Test + public void testClearBitCache() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0x08, bis.readBits(4)); + bis.clearBitCache(); + assertEquals(0, bis.readBits(1)); + } + } + + @Test + public void testEOF() throws IOException { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0x2f0140f8, bis.readBits(30)); + assertEquals(-1, bis.readBits(3)); + } + } + + /** + * @see "https://issues.apache.org/jira/browse/COMPRESS-363" + */ + @Test + public void littleEndianWithOverflow() throws Exception { + ByteArrayInputStream in = new ByteArrayInputStream(new byte[] { + 87, // 01010111 + 45, // 00101101 + 66, // 01000010 + 15, // 00001111 + 90, // 01011010 + 29, // 00011101 + 88, // 01011000 + 61, // 00111101 + 33, // 00100001 + 74 // 01001010 + }); + try (final BitInputStream bin = new BitInputStream(in, ByteOrder.LITTLE_ENDIAN)) { + assertEquals(23, // 10111 + bin.readBits(5)); + assertEquals(714595605644185962l, // 0001-00111101-01011000-00011101-01011010-00001111-01000010-00101101-010 + bin.readBits(63)); + assertEquals(1186, // 01001010-0010 + bin.readBits(12)); + assertEquals(-1 , bin.readBits(1)); + } + } + + @Test + public void bigEndianWithOverflow() throws Exception { + ByteArrayInputStream in = new ByteArrayInputStream(new byte[] { + 87, // 01010111 + 45, // 00101101 + 66, // 01000010 + 15, // 00001111 + 90, // 01011010 + 29, // 00011101 + 88, // 01011000 + 61, // 00111101 + 33, // 00100001 + 74 // 01001010 + }); + try (final BitInputStream bin = new BitInputStream(in, ByteOrder.BIG_ENDIAN)) { + assertEquals(10, // 01010 + bin.readBits(5)); + assertEquals(8274274654740644818l, //111-00101101-01000010-00001111-01011010-00011101-01011000-00111101-0010 + bin.readBits(63)); + assertEquals(330, // 0001-01001010 + bin.readBits(12)); + assertEquals(-1 , bin.readBits(1)); + } + } + + @Test + public void alignWithByteBoundaryWhenAtBoundary() throws Exception { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0xF8, bis.readBits(8)); + bis.alignWithByteBoundary(); + assertEquals(0, bis.readBits(4)); + } + } + + @Test + public void alignWithByteBoundaryWhenNotAtBoundary() throws Exception { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0x08, bis.readBits(4)); + assertEquals(4, bis.bitsCached()); + bis.alignWithByteBoundary(); + assertEquals(0, bis.bitsCached()); + assertEquals(0, bis.readBits(4)); + } + } + + @Test + public void availableWithoutCache() throws Exception { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(32, bis.bitsAvailable()); + } + } + + @Test + public void availableWithCache() throws Exception { + try (final BitInputStream bis = new BitInputStream(getStream(), ByteOrder.LITTLE_ENDIAN)) { + assertEquals(0x08, bis.readBits(4)); + assertEquals(28, bis.bitsAvailable()); + } + } + + private ByteArrayInputStream getStream() { + return new ByteArrayInputStream(new byte[] { + (byte) 0xF8, // 11111000 + 0x40, // 01000000 + 0x01, // 00000001 + 0x2F }); // 00101111 + } + +} diff --git a/src/test/java/org/apache/commons/compress/utils/ByteUtilsTest.java b/src/test/java/org/apache/commons/compress/utils/ByteUtilsTest.java new file mode 100644 index 000000000..1671ec014 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/ByteUtilsTest.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Arrays; + +import org.junit.Test; + +import static org.apache.commons.compress.utils.ByteUtils.*; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class ByteUtilsTest { + + @Test + public void fromLittleEndianFromArrayOneArg() { + byte[] b = new byte[] { 2, 3, 4 }; + assertEquals(2 + 3 * 256 + 4 * 256 * 256, fromLittleEndian(b)); + } + + @Test + public void fromLittleEndianFromArrayOneArgUnsignedInt32() { + byte[] b = new byte[] { 2, 3, 4, (byte) 128 }; + assertEquals(2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, fromLittleEndian(b)); + } + + @Test(expected = IllegalArgumentException.class) + public void fromLittleEndianFromArrayOneArgThrowsForLengthTooBig() { + fromLittleEndian(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + } + + @Test + public void fromLittleEndianFromArray() { + byte[] b = new byte[] { 1, 2, 3, 4, 5 }; + assertEquals(2 + 3 * 256 + 4 * 256 * 256, fromLittleEndian(b, 1, 3)); + } + + @Test + public void fromLittleEndianFromArrayUnsignedInt32() { + byte[] b = new byte[] { 1, 2, 3, 4, (byte) 128 }; + assertEquals(2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, fromLittleEndian(b, 1, 4)); + } + + @Test(expected = IllegalArgumentException.class) + public void fromLittleEndianFromArrayThrowsForLengthTooBig() { + fromLittleEndian(new byte[0], 0, 9); + } + + @Test + public void fromLittleEndianFromStream() throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(new byte[] { 2, 3, 4, 5 }); + assertEquals(2 + 3 * 256 + 4 * 256 * 256, fromLittleEndian(bin, 3)); + } + + @Test + public void fromLittleEndianFromStreamUnsignedInt32() throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(new byte[] { 2, 3, 4, (byte) 128 }); + assertEquals(2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, fromLittleEndian(bin, 4)); + } + + @Test(expected = IllegalArgumentException.class) + public void fromLittleEndianFromStreamThrowsForLengthTooBig() throws IOException { + fromLittleEndian(new ByteArrayInputStream(new byte[0]), 9); + } + + @Test(expected = IOException.class) + public void fromLittleEndianFromStreamThrowsForPrematureEnd() throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(new byte[] { 2, 3 }); + fromLittleEndian(bin, 3); + } + + @Test + public void fromLittleEndianFromSupplier() throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(new byte[] { 2, 3, 4, 5 }); + assertEquals(2 + 3 * 256 + 4 * 256 * 256, fromLittleEndian(new InputStreamByteSupplier(bin), 3)); + } + + @Test + public void fromLittleEndianFromSupplierUnsignedInt32() throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(new byte[] { 2, 3, 4, (byte) 128 }); + assertEquals(2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, + fromLittleEndian(new InputStreamByteSupplier(bin), 4)); + } + + @Test(expected = IllegalArgumentException.class) + public void fromLittleEndianFromSupplierThrowsForLengthTooBig() throws IOException { + fromLittleEndian(new InputStreamByteSupplier(new ByteArrayInputStream(new byte[0])), 9); + } + + @Test(expected = IOException.class) + public void fromLittleEndianFromSupplierThrowsForPrematureEnd() throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(new byte[] { 2, 3 }); + fromLittleEndian(new InputStreamByteSupplier(bin), 3); + } + + @Test + public void fromLittleEndianFromDataInput() throws IOException { + DataInput din = new DataInputStream(new ByteArrayInputStream(new byte[] { 2, 3, 4, 5 })); + assertEquals(2 + 3 * 256 + 4 * 256 * 256, fromLittleEndian(din, 3)); + } + + @Test + public void fromLittleEndianFromDataInputUnsignedInt32() throws IOException { + DataInput din = new DataInputStream(new ByteArrayInputStream(new byte[] { 2, 3, 4, (byte) 128 })); + assertEquals(2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, fromLittleEndian(din, 4)); + } + + @Test(expected = IllegalArgumentException.class) + public void fromLittleEndianFromDataInputThrowsForLengthTooBig() throws IOException { + DataInput din = new DataInputStream(new ByteArrayInputStream(new byte[0])); + fromLittleEndian(din, 9); + } + + @Test(expected = java.io.EOFException.class) + public void fromLittleEndianFromDataInputThrowsForPrematureEnd() throws IOException { + DataInput din = new DataInputStream(new ByteArrayInputStream(new byte[] { 2, 3 })); + fromLittleEndian(din, 3); + } + + @Test + public void toLittleEndianToStream() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + toLittleEndian(bos, 2 + 3 * 256 + 4 * 256 * 256, 3); + bos.close(); + assertArrayEquals(new byte[] { 2, 3, 4 }, bos.toByteArray()); + } + + @Test + public void toLittleEndianToStreamUnsignedInt32() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + toLittleEndian(bos, 2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, 4); + bos.close(); + assertArrayEquals(new byte[] { 2, 3, 4, (byte) 128 }, bos.toByteArray()); + } + + @Test + public void toLittleEndianToConsumer() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + toLittleEndian(new OutputStreamByteConsumer(bos), 2 + 3 * 256 + 4 * 256 * 256, 3); + bos.close(); + assertArrayEquals(new byte[] { 2, 3, 4 }, bos.toByteArray()); + } + + @Test + public void toLittleEndianToConsumerUnsignedInt32() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + toLittleEndian(new OutputStreamByteConsumer(bos), 2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, 4); + bos.close(); + assertArrayEquals(new byte[] { 2, 3, 4, (byte) 128 }, bos.toByteArray()); + } + + @Test + public void toLittleEndianToDataOutput() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutput dos = new DataOutputStream(bos); + toLittleEndian(dos, 2 + 3 * 256 + 4 * 256 * 256, 3); + bos.close(); + assertArrayEquals(new byte[] { 2, 3, 4 }, bos.toByteArray()); + } + + @Test + public void toLittleEndianToDataOutputUnsignedInt32() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutput dos = new DataOutputStream(bos); + toLittleEndian(dos, 2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, 4); + bos.close(); + assertArrayEquals(new byte[] { 2, 3, 4, (byte) 128 }, bos.toByteArray()); + } + + + @Test + public void toLittleEndianToByteArray() throws IOException { + byte[] b = new byte[4]; + toLittleEndian(b, 2 + 3 * 256 + 4 * 256 * 256, 1, 3); + assertArrayEquals(new byte[] { 2, 3, 4 }, Arrays.copyOfRange(b, 1, 4)); + } + + @Test + public void toLittleEndianToByteArrayUnsignedInt32() throws IOException { + byte[] b = new byte[4]; + toLittleEndian(b, 2 + 3 * 256 + 4 * 256 * 256 + 128l * 256 * 256 * 256, 0, 4); + assertArrayEquals(new byte[] { 2, 3, 4, (byte) 128 }, b); + } +} diff --git a/src/test/java/org/apache/commons/compress/utils/CharsetsTest.java b/src/test/java/org/apache/commons/compress/utils/CharsetsTest.java new file mode 100644 index 000000000..3f39e1c78 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/CharsetsTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.utils; + +import java.nio.charset.Charset; + +import org.junit.Assert; + +import org.junit.Test; + +/** + * Sanity checks for {@link Charsets}. + * + */ +public class CharsetsTest { + + @Test + public void testToCharset() { + Assert.assertEquals(Charset.defaultCharset(), Charsets.toCharset((String) null)); + Assert.assertEquals(Charset.defaultCharset(), Charsets.toCharset((Charset) null)); + Assert.assertEquals(Charset.defaultCharset(), Charsets.toCharset(Charset.defaultCharset())); + Assert.assertEquals(Charset.forName("UTF-8"), Charsets.toCharset(Charset.forName("UTF-8"))); + } + +} diff --git a/src/test/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStreamTest.java b/src/test/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStreamTest.java new file mode 100644 index 000000000..a9e52464c --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStreamTest.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.compress.utils; + +import org.junit.Test; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.zip.Adler32; +import java.util.zip.CRC32; + +import static org.junit.Assert.*; + +/** + * Unit tests for class {@link ChecksumCalculatingInputStream org.apache.commons.compress.utils.ChecksumCalculatingInputStream}. + * + * @date 13.06.2017 + * @see ChecksumCalculatingInputStream + **/ +public class ChecksumCalculatingInputStreamTest { + + + + @Test + public void testSkipReturningZero() throws IOException { + + Adler32 adler32 = new Adler32(); + byte[] byteArray = new byte[0]; + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray); + ChecksumCalculatingInputStream checksumCalculatingInputStream = new ChecksumCalculatingInputStream(adler32, byteArrayInputStream); + long skipResult = checksumCalculatingInputStream.skip(60L); + + assertEquals(0L, skipResult); + + assertEquals(1L, checksumCalculatingInputStream.getValue()); + + + } + + + @Test + public void testSkipReturningPositive() throws IOException { + + Adler32 adler32 = new Adler32(); + byte[] byteArray = new byte[6]; + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray); + ChecksumCalculatingInputStream checksumCalculatingInputStream = new ChecksumCalculatingInputStream(adler32, byteArrayInputStream); + long skipResult = checksumCalculatingInputStream.skip((byte)0); + + assertEquals(1L, skipResult); + + assertEquals(65537L, checksumCalculatingInputStream.getValue()); + + } + + + @Test + public void testReadTakingNoArguments() throws IOException { + + Adler32 adler32 = new Adler32(); + byte[] byteArray = new byte[6]; + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray); + ChecksumCalculatingInputStream checksumCalculatingInputStream = new ChecksumCalculatingInputStream(adler32, byteArrayInputStream); + BufferedInputStream bufferedInputStream = new BufferedInputStream(checksumCalculatingInputStream); + int inputStreamReadResult = bufferedInputStream.read(byteArray, 0, 1); + int checkSumCalculationReadResult = checksumCalculatingInputStream.read(); + + assertFalse(checkSumCalculationReadResult == inputStreamReadResult); + assertEquals((-1), checkSumCalculationReadResult); + + assertEquals(0, byteArrayInputStream.available()); + + assertEquals(393217L, checksumCalculatingInputStream.getValue()); + + } + + + @Test + public void testReadTakingByteArray() throws IOException { + + Adler32 adler32 = new Adler32(); + byte[] byteArray = new byte[6]; + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray); + ChecksumCalculatingInputStream checksumCalculatingInputStream = new ChecksumCalculatingInputStream(adler32, byteArrayInputStream); + int readResult = checksumCalculatingInputStream.read(byteArray); + + assertEquals(6, readResult); + + assertEquals(0, byteArrayInputStream.available()); + assertEquals(393217L, checksumCalculatingInputStream.getValue()); + + } + + + @Test(expected = NullPointerException.class) + public void testClassInstantiationWithParameterBeingNullThrowsNullPointerExceptionOne() { + + ChecksumCalculatingInputStream checksumCalculatingInputStream = new ChecksumCalculatingInputStream(null,null); + + + } + + + @Test(expected = NullPointerException.class) + public void testClassInstantiationWithParameterBeingNullThrowsNullPointerExceptionTwo() { + + ChecksumCalculatingInputStream checksumCalculatingInputStream = new ChecksumCalculatingInputStream(null,new ByteArrayInputStream(new byte[1])); + + + } + + + @Test(expected = NullPointerException.class) + public void testClassInstantiationWithParameterBeingNullThrowsNullPointerExceptionThree() { + + ChecksumCalculatingInputStream checksumCalculatingInputStream = new ChecksumCalculatingInputStream(new CRC32(),null); + + } + + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStreamTest.java b/src/test/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStreamTest.java new file mode 100644 index 000000000..cc9b10074 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStreamTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.compress.utils; + +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.zip.Adler32; +import java.util.zip.CRC32; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +/** + * Unit tests for class {@link ChecksumVerifyingInputStream org.apache.commons.compress.utils.ChecksumVerifyingInputStream}. + * + * @date 13.06.2017 + * @see ChecksumVerifyingInputStream + **/ +public class ChecksumVerifyingInputStreamTest { + + + + @Test(expected = IOException.class) + public void testReadTakingByteArrayThrowsIOException() throws IOException { + + Adler32 adler32 = new Adler32(); + byte[] byteArray = new byte[3]; + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray); + ChecksumVerifyingInputStream checksumVerifyingInputStream = new ChecksumVerifyingInputStream(adler32, byteArrayInputStream, (-1859L), (byte) (-68)); + + checksumVerifyingInputStream.read(byteArray); + + } + + + @Test(expected = IOException.class) + public void testReadTakingNoArgumentsThrowsIOException() throws IOException { + + CRC32 cRC32_ = new CRC32(); + byte[] byteArray = new byte[9]; + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray); + ChecksumVerifyingInputStream checksumVerifyingInputStream = new ChecksumVerifyingInputStream(cRC32_, byteArrayInputStream, (byte)1, (byte)1); + + checksumVerifyingInputStream.read(); + + } + + + @Test + public void testSkip() throws IOException { + + CRC32 cRC32_ = new CRC32(); + byte[] byteArray = new byte[4]; + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray); + ChecksumVerifyingInputStream checksumVerifyingInputStream = new ChecksumVerifyingInputStream(cRC32_, byteArrayInputStream, (byte)33, 2303L); + int intOne = checksumVerifyingInputStream.read(byteArray); + + long skipReturnValue = checksumVerifyingInputStream.skip((byte)1); + + assertEquals(558161692L, cRC32_.getValue()); + assertEquals(0, byteArrayInputStream.available()); + + assertArrayEquals(new byte[] {(byte)0, (byte)0, (byte)0, (byte)0}, byteArray); + assertEquals(0L, skipReturnValue); + + } + + + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/utils/CountingStreamTest.java b/src/test/java/org/apache/commons/compress/utils/CountingStreamTest.java new file mode 100644 index 000000000..3be18701f --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/CountingStreamTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; + +import org.junit.Test; + +public class CountingStreamTest { + + @Test + public void output() throws Exception { + // I don't like "test all at once" tests either, but the class + // is so trivial + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + try (final CountingOutputStream o = new CountingOutputStream(bos)) { + o.write(1); + assertEquals(1, o.getBytesWritten()); + o.write(new byte[] { 2, 3 }); + assertEquals(3, o.getBytesWritten()); + o.write(new byte[] { 2, 3, 4, 5, }, 2, 1); + assertEquals(4, o.getBytesWritten()); + o.count(-1); + assertEquals(4, o.getBytesWritten()); + o.count(-2); + assertEquals(2, o.getBytesWritten()); + } + assertArrayEquals(new byte[] { 1, 2, 3, 4 }, bos.toByteArray()); + } + + @Test + public void input() throws Exception { + // I don't like "test all at once" tests either, but the class + // is so trivial + final ByteArrayInputStream bis = + new ByteArrayInputStream(new byte[] { 1, 2, 3, 4 }); + try (final CountingInputStream i = new CountingInputStream(bis)) { + assertEquals(1, i.read()); + assertEquals(1, i.getBytesRead()); + byte[] b = new byte[2]; + i.read(b); + assertEquals(3, i.getBytesRead()); + assertArrayEquals(new byte[] { 2, 3 }, b); + b = new byte[3]; + i.read(b, 1, 1); + assertArrayEquals(new byte[] { 0, 4, 0 }, b); + assertEquals(4, i.getBytesRead()); + i.count(-1); + assertEquals(4, i.getBytesRead()); + i.count(-2); + assertEquals(2, i.getBytesRead()); + } + } + +} diff --git a/src/test/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStreamTest.java b/src/test/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStreamTest.java new file mode 100644 index 000000000..cfda61b31 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStreamTest.java @@ -0,0 +1,383 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicBoolean; +import org.hamcrest.core.IsInstanceOf; +import org.junit.Test; +import org.mockito.internal.matchers.GreaterOrEqual; + +public class FixedLengthBlockOutputStreamTest { + + @Test + public void testSmallWrite() throws IOException { + testWriteAndPad(10240, "hello world!\n", false); + testWriteAndPad(512, "hello world!\n", false); + testWriteAndPad(11, "hello world!\n", false); + testWriteAndPad(3, "hello world!\n", false); + } + + @Test + public void testSmallWriteToStream() throws IOException { + testWriteAndPadToStream(10240, "hello world!\n", false); + testWriteAndPadToStream(512, "hello world!\n", false); + testWriteAndPadToStream(11, "hello world!\n", false); + testWriteAndPadToStream(3, "hello world!\n", false); + } + + @Test + public void testWriteSingleBytes() throws IOException { + int blockSize = 4; + MockWritableByteChannel mock = new MockWritableByteChannel(blockSize, false); + ByteArrayOutputStream bos = mock.bos; + String text = "hello world avengers"; + byte msg[] = text.getBytes(); + int len = msg.length; + try (FixedLengthBlockOutputStream out = new FixedLengthBlockOutputStream(mock, blockSize)) { + for (int i = 0; i < len; i++) { + out.write(msg[i]); + } + } + byte[] output = bos.toByteArray(); + + validate(blockSize, msg, output); + } + + + @Test + public void testWriteBuf() throws IOException { + String hwa = "hello world avengers"; + testBuf(4, hwa); + testBuf(512, hwa); + testBuf(10240, hwa); + testBuf(11, hwa + hwa + hwa); + } + + @Test + public void testMultiWriteBuf() throws IOException { + int blockSize = 13; + MockWritableByteChannel mock = new MockWritableByteChannel(blockSize, false); + String testString = "hello world"; + byte msg[] = testString.getBytes(); + int reps = 17; + + try (FixedLengthBlockOutputStream out = new FixedLengthBlockOutputStream(mock, blockSize)) { + for (int i = 0; i < reps; i++) { + ByteBuffer buf = getByteBuffer(msg); + out.write(buf); + } + } + ByteArrayOutputStream bos = mock.bos; + double v = Math.ceil((reps * msg.length) / (double) blockSize) * blockSize; + assertEquals("wrong size", (long) v, bos.size()); + int strLen = msg.length * reps; + byte[] output = bos.toByteArray(); + String l = new String(output, 0, strLen); + StringBuilder buf = new StringBuilder(strLen); + for (int i = 0; i < reps; i++) { + buf.append(testString); + } + assertEquals(buf.toString(), l); + for (int i = strLen; i < output.length; i++) { + assertEquals(0, output[i]); + } + } + + @Test + public void testPartialWritingThrowsException() { + try { + testWriteAndPad(512, "hello world!\n", true); + fail("Exception for partial write not thrown"); + } catch (IOException e) { + String msg = e.getMessage(); + assertEquals("exception message", + "Failed to write 512 bytes atomically. Only wrote 511", msg); + } + + } + + @Test + public void testWriteFailsAfterFLClosedThrowsException() { + try { + FixedLengthBlockOutputStream out = getClosedFLBOS(); + out.write(1); + fail("expected Closed Channel Exception"); + } catch (IOException e) { + assertThat(e, IsInstanceOf.instanceOf(ClosedChannelException.class)); + // expected + } + try { + FixedLengthBlockOutputStream out = getClosedFLBOS(); + out.write(new byte[] {0,1,2,3}); + fail("expected Closed Channel Exception"); + } catch (IOException e) { + assertThat(e, IsInstanceOf.instanceOf(ClosedChannelException.class)); + // expected + } + + try { + FixedLengthBlockOutputStream out = getClosedFLBOS(); + out.write(ByteBuffer.wrap(new byte[] {0,1,2,3})); + fail("expected Closed Channel Exception"); + } catch (IOException e) { + assertThat(e, IsInstanceOf.instanceOf(ClosedChannelException.class)); + // expected + } + + } + + private FixedLengthBlockOutputStream getClosedFLBOS() throws IOException { + int blockSize = 512; + FixedLengthBlockOutputStream out = new FixedLengthBlockOutputStream( + new MockOutputStream(blockSize, false), blockSize); + out.write(1); + assertTrue(out.isOpen()); + out.close(); + assertFalse(out.isOpen()); + return out; + } + + @Test + public void testWriteFailsAfterDestClosedThrowsException() { + int blockSize = 2; + MockOutputStream mock = new MockOutputStream(blockSize, false); + FixedLengthBlockOutputStream out = + new FixedLengthBlockOutputStream(mock, blockSize); + try { + out.write(1); + assertTrue(out.isOpen()); + mock.close(); + out.write(1); + fail("expected IO Exception"); + } catch (IOException e) { + // expected + } + assertFalse(out.isOpen()); + } + + @Test + public void testWithFileOutputStream() throws IOException { + final Path tempFile = Files.createTempFile("xxx", "yyy"); + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + try { + Files.deleteIfExists(tempFile); + } catch (IOException e) { + } + } + }); + int blockSize = 512; + int reps = 1000; + OutputStream os = new FileOutputStream(tempFile.toFile()); + try (FixedLengthBlockOutputStream out = new FixedLengthBlockOutputStream( + os, blockSize)) { + DataOutputStream dos = new DataOutputStream(out); + for (int i = 0; i < reps; i++) { + dos.writeInt(i); + } + } + long expectedDataSize = reps * 4L; + long expectedFileSize = (long)Math.ceil(expectedDataSize/(double)blockSize)*blockSize; + assertEquals("file size",expectedFileSize, Files.size(tempFile)); + DataInputStream din = new DataInputStream(Files.newInputStream(tempFile)); + for(int i=0;i<reps;i++) { + assertEquals("file int",i,din.readInt()); + } + for(int i=0;i<expectedFileSize - expectedDataSize;i++) { + assertEquals(0,din.read()); + } + assertEquals(-1,din.read()); + } + + private void testBuf(int blockSize, String text) throws IOException { + MockWritableByteChannel mock = new MockWritableByteChannel(blockSize, false); + + ByteArrayOutputStream bos = mock.bos; + byte msg[] = text.getBytes(); + ByteBuffer buf = getByteBuffer(msg); + try (FixedLengthBlockOutputStream out = new FixedLengthBlockOutputStream(mock, blockSize)) { + out.write(buf); + } + double v = Math.ceil(msg.length / (double) blockSize) * blockSize; + assertEquals("wrong size", (long) v, bos.size()); + byte[] output = bos.toByteArray(); + String l = new String(output, 0, msg.length); + assertEquals(text, l); + for (int i = msg.length; i < bos.size(); i++) { + assertEquals(String.format("output[%d]", i), 0, output[i]); + + } + } + + private ByteBuffer getByteBuffer(byte[] msg) { + int len = msg.length; + ByteBuffer buf = ByteBuffer.allocate(len); + buf.put(msg); + buf.flip(); + return buf; + } + + + private void testWriteAndPad(int blockSize, String text, boolean doPartialWrite) + throws IOException { + MockWritableByteChannel mock = new MockWritableByteChannel(blockSize, doPartialWrite); + byte[] msg = text.getBytes(StandardCharsets.US_ASCII); + + ByteArrayOutputStream bos = mock.bos; + try (FixedLengthBlockOutputStream out = new FixedLengthBlockOutputStream(mock, blockSize)) { + + out.write(msg); + assertEquals("no partial write", (msg.length / blockSize) * blockSize, bos.size()); + } + validate(blockSize, msg, bos.toByteArray()); + } + + private void testWriteAndPadToStream(int blockSize, String text, boolean doPartialWrite) + throws IOException { + MockOutputStream mock = new MockOutputStream(blockSize, doPartialWrite); + byte[] msg = text.getBytes(StandardCharsets.US_ASCII); + + ByteArrayOutputStream bos = mock.bos; + try (FixedLengthBlockOutputStream out = new FixedLengthBlockOutputStream(mock, blockSize)) { + out.write(msg); + assertEquals("no partial write", (msg.length / blockSize) * blockSize, bos.size()); + } + validate(blockSize, msg, bos.toByteArray()); + + } + + + private void validate(int blockSize, byte[] expectedBytes, byte[] actualBytes) { + double v = Math.ceil(expectedBytes.length / (double) blockSize) * blockSize; + assertEquals("wrong size", (long) v, actualBytes.length); + assertContainsAtOffset("output", expectedBytes, 0, actualBytes); + for (int i = expectedBytes.length; i < actualBytes.length; i++) { + assertEquals(String.format("output[%d]", i), 0, actualBytes[i]); + + } + } + + private static void assertContainsAtOffset(String msg, byte[] expected, int offset, + byte[] actual) { + assertThat(actual.length, new GreaterOrEqual<>(offset + expected.length)); + for (int i = 0; i < expected.length; i++) { + assertEquals(String.format("%s ([%d])", msg, i), expected[i], actual[i + offset]); + } + } + + private static class MockOutputStream extends OutputStream { + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + private final int requiredWriteSize; + private final boolean doPartialWrite; + private AtomicBoolean closed = new AtomicBoolean(); + + private MockOutputStream(int requiredWriteSize, boolean doPartialWrite) { + this.requiredWriteSize = requiredWriteSize; + this.doPartialWrite = doPartialWrite; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + checkIsOpen(); + assertEquals("write size", requiredWriteSize, len); + if (doPartialWrite) { + len--; + } + bos.write(b, off, len); + } + + private void checkIsOpen() throws IOException { + if (closed.get()) { + IOException e = new IOException("Closed"); + throw e; + } + } + + @Override + public void write(int b) throws IOException { + checkIsOpen(); + assertEquals("write size", requiredWriteSize, 1); + bos.write(b); + } + + @Override + public void close() throws IOException { + if (closed.compareAndSet(false, true)) { + bos.close(); + } + } + } + + private static class MockWritableByteChannel implements WritableByteChannel { + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + private final int requiredWriteSize; + private final boolean doPartialWrite; + + private MockWritableByteChannel(int requiredWriteSize, boolean doPartialWrite) { + this.requiredWriteSize = requiredWriteSize; + this.doPartialWrite = doPartialWrite; + } + + @Override + public int write(ByteBuffer src) throws IOException { + assertEquals("write size", requiredWriteSize, src.remaining()); + if (doPartialWrite) { + src.limit(src.limit() - 1); + } + int bytesOut = src.remaining(); + while (src.hasRemaining()) { + bos.write(src.get()); + } + return bytesOut; + } + + AtomicBoolean closed = new AtomicBoolean(); + + @Override + public boolean isOpen() { + return !closed.get(); + } + + @Override + public void close() throws IOException { + closed.compareAndSet(false, true); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/utils/IOUtilsTest.java b/src/test/java/org/apache/commons/compress/utils/IOUtilsTest.java new file mode 100644 index 000000000..fce779e3d --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/IOUtilsTest.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.utils; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.FilterInputStream; +import java.io.InputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.ReadableByteChannel; + +import org.junit.Assert; +import org.junit.Test; + +public class IOUtilsTest { + + private interface StreamWrapper { + InputStream wrap(InputStream toWrap); + } + + @Test + public void skipUsingSkip() throws Exception { + skip(new StreamWrapper() { + @Override + public InputStream wrap(final InputStream toWrap) { + return toWrap; + } + }); + } + + @Test + public void skipUsingRead() throws Exception { + skip(new StreamWrapper() { + @Override + public InputStream wrap(final InputStream toWrap) { + return new FilterInputStream(toWrap) { + @Override + public long skip(final long s) { + return 0; + } + }; + } + }); + } + + @Test + public void skipUsingSkipAndRead() throws Exception { + skip(new StreamWrapper() { + @Override + public InputStream wrap(final InputStream toWrap) { + return new FilterInputStream(toWrap) { + boolean skipped; + @Override + public long skip(final long s) throws IOException { + if (!skipped) { + toWrap.skip(5); + skipped = true; + return 5; + } + return 0; + } + }; + } + }); + } + + @Test + public void readFullyOnChannelReadsFully() throws IOException { + ByteBuffer b = ByteBuffer.allocate(20); + final byte[] source = new byte[20]; + for (byte i = 0; i < 20; i++) { + source[i] = i; + } + readFully(source, b); + Assert.assertArrayEquals(source, b.array()); + } + + @Test(expected = EOFException.class) + public void readFullyOnChannelThrowsEof() throws IOException { + ByteBuffer b = ByteBuffer.allocate(21); + final byte[] source = new byte[20]; + for (byte i = 0; i < 20; i++) { + source[i] = i; + } + readFully(source, b); + } + + @Test(expected = IllegalArgumentException.class) + public void copyThrowsOnZeroBufferSize() throws IOException { + IOUtils.copy(new ByteArrayInputStream(new byte[0]), new ByteArrayOutputStream(), 0); + } + + private static void readFully(final byte[] source, ByteBuffer b) throws IOException { + IOUtils.readFully(new ReadableByteChannel() { + private int idx; + @Override + public int read(ByteBuffer buf) { + if (idx >= source.length) { + return -1; + } + buf.put(source[idx++]); + return 1; + } + @Override + public void close() { } + @Override + public boolean isOpen() { + return true; + } + }, b); + } + + private void skip(final StreamWrapper wrapper) throws Exception { + final ByteArrayInputStream in = new ByteArrayInputStream(new byte[] { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + }); + final InputStream sut = wrapper.wrap(in); + Assert.assertEquals(10, IOUtils.skip(sut, 10)); + Assert.assertEquals(11, sut.read()); + } +} diff --git a/src/test/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannelTest.java b/src/test/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannelTest.java new file mode 100644 index 000000000..32af30847 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannelTest.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; +import java.nio.charset.Charset; +import java.util.Arrays; + +import static org.apache.commons.compress.utils.CharsetNames.UTF_8; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class SeekableInMemoryByteChannelTest { + + private final byte[] testData = "Some data".getBytes(Charset.forName(UTF_8)); + + @Test + public void shouldReadContentsProperly() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(testData); + ByteBuffer readBuffer = ByteBuffer.allocate(testData.length); + //when + int readCount = c.read(readBuffer); + //then + assertEquals(testData.length, readCount); + assertArrayEquals(testData, readBuffer.array()); + assertEquals(testData.length, c.position()); + c.close(); + } + + @Test + public void shouldReadContentsWhenBiggerBufferSupplied() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(testData); + ByteBuffer readBuffer = ByteBuffer.allocate(testData.length + 1); + //when + int readCount = c.read(readBuffer); + //then + assertEquals(testData.length, readCount); + assertArrayEquals(testData, Arrays.copyOf(readBuffer.array(), testData.length)); + assertEquals(testData.length, c.position()); + c.close(); + } + + @Test + public void shouldReadDataFromSetPosition() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(testData); + ByteBuffer readBuffer = ByteBuffer.allocate(4); + //when + c.position(5L); + int readCount = c.read(readBuffer); + //then + assertEquals(4L, readCount); + assertEquals("data", new String(readBuffer.array(), Charset.forName(UTF_8))); + assertEquals(testData.length, c.position()); + c.close(); + } + + @Test + public void shouldSignalEOFWhenPositionAtTheEnd() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(testData); + ByteBuffer readBuffer = ByteBuffer.allocate(testData.length); + //when + c.position(testData.length + 1); + int readCount = c.read(readBuffer); + //then + assertEquals(0L, readBuffer.position()); + assertEquals(-1, readCount); + c.close(); + } + + @Test(expected = ClosedChannelException.class) + public void shouldThrowExceptionOnReadingClosedChannel() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(); + //when + c.close(); + c.read(ByteBuffer.allocate(1)); + } + + @Test + public void shouldWriteDataProperly() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(); + ByteBuffer inData = ByteBuffer.wrap(testData); + //when + int writeCount = c.write(inData); + //then + assertEquals(testData.length, writeCount); + assertArrayEquals(testData, Arrays.copyOf(c.array(), (int) c.size())); + assertEquals(testData.length, c.position()); + c.close(); + } + + @Test + public void shouldWriteDataProperlyAfterPositionSet() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(testData); + ByteBuffer inData = ByteBuffer.wrap(testData); + ByteBuffer expectedData = ByteBuffer.allocate(testData.length + 5).put(testData, 0, 5).put(testData); + //when + c.position(5L); + int writeCount = c.write(inData); + + //then + assertEquals(testData.length, writeCount); + assertArrayEquals(expectedData.array(), Arrays.copyOf(c.array(), (int) c.size())); + assertEquals(testData.length + 5, c.position()); + c.close(); + } + + + @Test(expected = ClosedChannelException.class) + public void shouldThrowExceptionOnWritingToClosedChannel() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(); + //when + c.close(); + c.write(ByteBuffer.allocate(1)); + } + + @Test + public void shouldTruncateContentsProperly() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(testData); + //when + c.truncate(4); + //then + byte[] bytes = Arrays.copyOf(c.array(), (int) c.size()); + assertEquals("Some", new String(bytes, Charset.forName(UTF_8))); + c.close(); + } + + @Test + public void shouldSetProperPositionOnTruncate() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(testData); + //when + c.position(testData.length); + c.truncate(4L); + //then + assertEquals(4L, c.position()); + assertEquals(4L, c.size()); + c.close(); + } + + @Test + public void shouldSetProperPosition() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(testData); + //when + long posAtFour = c.position(4L).position(); + long posAtTheEnd = c.position(testData.length).position(); + long posPastTheEnd = c.position(testData.length + 1L).position(); + //then + assertEquals(4L, posAtFour); + assertEquals(c.size(), posAtTheEnd); + assertEquals(posPastTheEnd, posPastTheEnd); + c.close(); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldThrowExceptionWhenSettingIncorrectPosition() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(); + //when + c.position(Integer.MAX_VALUE + 1L); + c.close(); + } + + @Test(expected = ClosedChannelException.class) + public void shouldThrowExceptionWhenSettingPositionOnClosedChannel() throws IOException { + //given + SeekableInMemoryByteChannel c = new SeekableInMemoryByteChannel(); + //when + c.close(); + c.position(1L); + } + +} diff --git a/src/test/java/org/apache/commons/compress/utils/ServiceLoaderIteratorTest.java b/src/test/java/org/apache/commons/compress/utils/ServiceLoaderIteratorTest.java new file mode 100644 index 000000000..c39f40187 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/ServiceLoaderIteratorTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import org.junit.Test; + +import java.util.NoSuchElementException; + +import static org.junit.Assert.assertFalse; + +/** + * Unit tests for class {@link ServiceLoaderIterator org.apache.commons.compress.utils.ServiceLoaderIterator}. + * + * @date 13.06.2017 + * @see ServiceLoaderIterator + **/ +public class ServiceLoaderIteratorTest { + + + + @Test(expected = NoSuchElementException.class) + public void testNextThrowsNoSuchElementException() { + + Class<String> clasz = String.class; + ServiceLoaderIterator<String> serviceLoaderIterator = new ServiceLoaderIterator<String>(clasz); + + serviceLoaderIterator.next(); + + } + + + @Test + public void testHasNextReturnsFalse() { + + Class<Object> clasz = Object.class; + ServiceLoaderIterator<Object> serviceLoaderIterator = new ServiceLoaderIterator<Object>(clasz); + boolean result = serviceLoaderIterator.hasNext(); + + assertFalse(result); + + } + + + @Test(expected = UnsupportedOperationException.class) + public void testRemoveThrowsUnsupportedOperationException() { + + Class<Integer> clasz = Integer.class; + ServiceLoaderIterator<Integer> serviceLoaderIterator = new ServiceLoaderIterator<Integer>(clasz); + + serviceLoaderIterator.remove(); + + + } + + + +}
\ No newline at end of file diff --git a/src/test/java/org/apache/commons/compress/utils/SkipShieldingInputStreamTest.java b/src/test/java/org/apache/commons/compress/utils/SkipShieldingInputStreamTest.java new file mode 100644 index 000000000..5ae69cf38 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/utils/SkipShieldingInputStreamTest.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.io.InputStream; +import org.junit.Assert; +import org.junit.Test; + +public class SkipShieldingInputStreamTest { + + @Test + public void skipDelegatesToRead() throws IOException { + try (InputStream i = new SkipShieldingInputStream(new InputStream() { + @Override + public long skip(long n) { + Assert.fail("skip invoked"); + return -1; + } + @Override + public int read() { + return -1; + } + @Override + public int read(byte[] b, int off, int len) { + return len; + } + })) { + Assert.assertEquals(100, i.skip(100)); + } + } + + @Test + public void skipHasAnUpperBoundOnRead() throws IOException { + try (InputStream i = new SkipShieldingInputStream(new InputStream() { + @Override + public long skip(long n) { + Assert.fail("skip invoked"); + return -1; + } + @Override + public int read() { + return -1; + } + @Override + public int read(byte[] b, int off, int len) { + return len; + } + })) { + Assert.assertTrue(Integer.MAX_VALUE > i.skip(Long.MAX_VALUE)); + } + } + + @Test + public void skipSwallowsNegativeArguments() throws IOException { + try (InputStream i = new SkipShieldingInputStream(new InputStream() { + @Override + public long skip(long n) { + Assert.fail("skip invoked"); + return -1; + } + @Override + public int read() { + return -1; + } + @Override + public int read(byte[] b, int off, int len) { + Assert.fail("read invoked"); + return len; + } + })) { + Assert.assertEquals(0, i.skip(Long.MIN_VALUE)); + } + } + +} diff --git a/src/test/resources/7z-empty-mhc-off.7z b/src/test/resources/7z-empty-mhc-off.7z Binary files differnew file mode 100644 index 000000000..8e00c4353 --- /dev/null +++ b/src/test/resources/7z-empty-mhc-off.7z diff --git a/src/test/resources/7z-hello-mhc-off-copy.7z b/src/test/resources/7z-hello-mhc-off-copy.7z Binary files differnew file mode 100644 index 000000000..2cc91da8e --- /dev/null +++ b/src/test/resources/7z-hello-mhc-off-copy.7z diff --git a/src/test/resources/7z-hello-mhc-off-lzma2.7z b/src/test/resources/7z-hello-mhc-off-lzma2.7z Binary files differnew file mode 100644 index 000000000..0fbcd2574 --- /dev/null +++ b/src/test/resources/7z-hello-mhc-off-lzma2.7z diff --git a/src/test/resources/8.posix.tar.gz b/src/test/resources/8.posix.tar.gz Binary files differnew file mode 100644 index 000000000..ee55202e8 --- /dev/null +++ b/src/test/resources/8.posix.tar.gz diff --git a/src/test/resources/8.star.tar.gz b/src/test/resources/8.star.tar.gz Binary files differnew file mode 100644 index 000000000..b63c78d4b --- /dev/null +++ b/src/test/resources/8.star.tar.gz diff --git a/src/test/resources/COMPRESS-114.tar b/src/test/resources/COMPRESS-114.tar Binary files differnew file mode 100644 index 000000000..9227f98a5 --- /dev/null +++ b/src/test/resources/COMPRESS-114.tar diff --git a/src/test/resources/COMPRESS-117.tar b/src/test/resources/COMPRESS-117.tar Binary files differnew file mode 100644 index 000000000..51abeb9d9 --- /dev/null +++ b/src/test/resources/COMPRESS-117.tar diff --git a/src/test/resources/COMPRESS-131.bz2 b/src/test/resources/COMPRESS-131.bz2 Binary files differnew file mode 100644 index 000000000..b8db61012 --- /dev/null +++ b/src/test/resources/COMPRESS-131.bz2 diff --git a/src/test/resources/COMPRESS-178.tar b/src/test/resources/COMPRESS-178.tar Binary files differnew file mode 100644 index 000000000..6a1321979 --- /dev/null +++ b/src/test/resources/COMPRESS-178.tar diff --git a/src/test/resources/COMPRESS-189.zip b/src/test/resources/COMPRESS-189.zip Binary files differnew file mode 100644 index 000000000..59954db65 --- /dev/null +++ b/src/test/resources/COMPRESS-189.zip diff --git a/src/test/resources/COMPRESS-197.tar b/src/test/resources/COMPRESS-197.tar Binary files differnew file mode 100644 index 000000000..2f42ee047 --- /dev/null +++ b/src/test/resources/COMPRESS-197.tar diff --git a/src/test/resources/COMPRESS-208.zip b/src/test/resources/COMPRESS-208.zip Binary files differnew file mode 100644 index 000000000..b21007475 --- /dev/null +++ b/src/test/resources/COMPRESS-208.zip diff --git a/src/test/resources/COMPRESS-210_unix_time_zip_test.zip b/src/test/resources/COMPRESS-210_unix_time_zip_test.zip Binary files differnew file mode 100644 index 000000000..7249ae141 --- /dev/null +++ b/src/test/resources/COMPRESS-210_unix_time_zip_test.zip diff --git a/src/test/resources/COMPRESS-211_uid_gid_zip_test.zip b/src/test/resources/COMPRESS-211_uid_gid_zip_test.zip Binary files differnew file mode 100644 index 000000000..b1d405c75 --- /dev/null +++ b/src/test/resources/COMPRESS-211_uid_gid_zip_test.zip diff --git a/src/test/resources/COMPRESS-214_unix_symlinks.zip b/src/test/resources/COMPRESS-214_unix_symlinks.zip Binary files differnew file mode 100644 index 000000000..e221a8437 --- /dev/null +++ b/src/test/resources/COMPRESS-214_unix_symlinks.zip diff --git a/src/test/resources/COMPRESS-219.zip b/src/test/resources/COMPRESS-219.zip Binary files differnew file mode 100644 index 000000000..de7b5c858 --- /dev/null +++ b/src/test/resources/COMPRESS-219.zip diff --git a/src/test/resources/COMPRESS-227.zip b/src/test/resources/COMPRESS-227.zip Binary files differnew file mode 100644 index 000000000..2f54ca636 --- /dev/null +++ b/src/test/resources/COMPRESS-227.zip diff --git a/src/test/resources/COMPRESS-228.zip b/src/test/resources/COMPRESS-228.zip Binary files differnew file mode 100644 index 000000000..25ecb12d2 --- /dev/null +++ b/src/test/resources/COMPRESS-228.zip diff --git a/src/test/resources/COMPRESS-245.tar.gz b/src/test/resources/COMPRESS-245.tar.gz Binary files differnew file mode 100644 index 000000000..913db860a --- /dev/null +++ b/src/test/resources/COMPRESS-245.tar.gz diff --git a/src/test/resources/COMPRESS-256.7z b/src/test/resources/COMPRESS-256.7z Binary files differnew file mode 100644 index 000000000..1c6a2e787 --- /dev/null +++ b/src/test/resources/COMPRESS-256.7z diff --git a/src/test/resources/COMPRESS-264.zip b/src/test/resources/COMPRESS-264.zip Binary files differnew file mode 100644 index 000000000..75f2be717 --- /dev/null +++ b/src/test/resources/COMPRESS-264.zip diff --git a/src/test/resources/COMPRESS-279.tar b/src/test/resources/COMPRESS-279.tar Binary files differnew file mode 100644 index 000000000..eeb0932f7 --- /dev/null +++ b/src/test/resources/COMPRESS-279.tar diff --git a/src/test/resources/COMPRESS-320/BZip2-solid.7z b/src/test/resources/COMPRESS-320/BZip2-solid.7z Binary files differnew file mode 100644 index 000000000..a1ff11b5a --- /dev/null +++ b/src/test/resources/COMPRESS-320/BZip2-solid.7z diff --git a/src/test/resources/COMPRESS-320/BZip2.7z b/src/test/resources/COMPRESS-320/BZip2.7z Binary files differnew file mode 100644 index 000000000..3272ecdfe --- /dev/null +++ b/src/test/resources/COMPRESS-320/BZip2.7z diff --git a/src/test/resources/COMPRESS-320/Copy-solid.7z b/src/test/resources/COMPRESS-320/Copy-solid.7z Binary files differnew file mode 100644 index 000000000..008564df2 --- /dev/null +++ b/src/test/resources/COMPRESS-320/Copy-solid.7z diff --git a/src/test/resources/COMPRESS-320/Copy.7z b/src/test/resources/COMPRESS-320/Copy.7z Binary files differnew file mode 100644 index 000000000..958cd8f31 --- /dev/null +++ b/src/test/resources/COMPRESS-320/Copy.7z diff --git a/src/test/resources/COMPRESS-320/Deflate-solid.7z b/src/test/resources/COMPRESS-320/Deflate-solid.7z Binary files differnew file mode 100644 index 000000000..00eb84dc4 --- /dev/null +++ b/src/test/resources/COMPRESS-320/Deflate-solid.7z diff --git a/src/test/resources/COMPRESS-320/Deflate.7z b/src/test/resources/COMPRESS-320/Deflate.7z Binary files differnew file mode 100644 index 000000000..b5e35706e --- /dev/null +++ b/src/test/resources/COMPRESS-320/Deflate.7z diff --git a/src/test/resources/COMPRESS-320/LZMA-solid.7z b/src/test/resources/COMPRESS-320/LZMA-solid.7z Binary files differnew file mode 100644 index 000000000..5f559936c --- /dev/null +++ b/src/test/resources/COMPRESS-320/LZMA-solid.7z diff --git a/src/test/resources/COMPRESS-320/LZMA.7z b/src/test/resources/COMPRESS-320/LZMA.7z Binary files differnew file mode 100644 index 000000000..3416d11a7 --- /dev/null +++ b/src/test/resources/COMPRESS-320/LZMA.7z diff --git a/src/test/resources/COMPRESS-320/LZMA2-solid.7z b/src/test/resources/COMPRESS-320/LZMA2-solid.7z Binary files differnew file mode 100644 index 000000000..5a9f80769 --- /dev/null +++ b/src/test/resources/COMPRESS-320/LZMA2-solid.7z diff --git a/src/test/resources/COMPRESS-320/LZMA2.7z b/src/test/resources/COMPRESS-320/LZMA2.7z Binary files differnew file mode 100644 index 000000000..c6c83477e --- /dev/null +++ b/src/test/resources/COMPRESS-320/LZMA2.7z diff --git a/src/test/resources/COMPRESS-320/PPMd-solid.7z b/src/test/resources/COMPRESS-320/PPMd-solid.7z Binary files differnew file mode 100644 index 000000000..5a3ee0abf --- /dev/null +++ b/src/test/resources/COMPRESS-320/PPMd-solid.7z diff --git a/src/test/resources/COMPRESS-320/PPMd.7z b/src/test/resources/COMPRESS-320/PPMd.7z Binary files differnew file mode 100644 index 000000000..237396ab6 --- /dev/null +++ b/src/test/resources/COMPRESS-320/PPMd.7z diff --git a/src/test/resources/COMPRESS-320/recreate.sh b/src/test/resources/COMPRESS-320/recreate.sh new file mode 100644 index 000000000..09bd0dc45 --- /dev/null +++ b/src/test/resources/COMPRESS-320/recreate.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +rm *.7z +for COMPRESSION in "LZMA" "LZMA2" "PPMd" "BZip2" "Deflate" "Copy"; do + # New solid block every 10 files. + 7za a -m0=$COMPRESSION -ms10f $COMPRESSION-solid.7z ../../../../src/main/java/org/apache/commons/compress/compressors + # Each file in isolation + 7za a -m0=$COMPRESSION -ms=off $COMPRESSION.7z ../../../../src/main/java/org/apache/commons/compress/compressors +done diff --git a/src/test/resources/COMPRESS-324.tar b/src/test/resources/COMPRESS-324.tar Binary files differnew file mode 100644 index 000000000..137b56e4d --- /dev/null +++ b/src/test/resources/COMPRESS-324.tar diff --git a/src/test/resources/COMPRESS-335.tar b/src/test/resources/COMPRESS-335.tar Binary files differnew file mode 100644 index 000000000..0266b6318 --- /dev/null +++ b/src/test/resources/COMPRESS-335.tar diff --git a/src/test/resources/COMPRESS-348.7z b/src/test/resources/COMPRESS-348.7z Binary files differnew file mode 100644 index 000000000..8fb26c2b0 --- /dev/null +++ b/src/test/resources/COMPRESS-348.7z diff --git a/src/test/resources/COMPRESS-351.zip b/src/test/resources/COMPRESS-351.zip Binary files differnew file mode 100644 index 000000000..9e4c331df --- /dev/null +++ b/src/test/resources/COMPRESS-351.zip diff --git a/src/test/resources/COMPRESS-355.tar b/src/test/resources/COMPRESS-355.tar Binary files differnew file mode 100644 index 000000000..6eb94f278 --- /dev/null +++ b/src/test/resources/COMPRESS-355.tar diff --git a/src/test/resources/COMPRESS-356.tar b/src/test/resources/COMPRESS-356.tar Binary files differnew file mode 100644 index 000000000..4dd6be9ff --- /dev/null +++ b/src/test/resources/COMPRESS-356.tar diff --git a/src/test/resources/COMPRESS-358.iwa b/src/test/resources/COMPRESS-358.iwa Binary files differnew file mode 100644 index 000000000..116d79cc3 --- /dev/null +++ b/src/test/resources/COMPRESS-358.iwa diff --git a/src/test/resources/COMPRESS-358.uncompressed b/src/test/resources/COMPRESS-358.uncompressed Binary files differnew file mode 100644 index 000000000..e7cfa3415 --- /dev/null +++ b/src/test/resources/COMPRESS-358.uncompressed diff --git a/src/test/resources/COMPRESS-379.jar b/src/test/resources/COMPRESS-379.jar Binary files differnew file mode 100644 index 000000000..8211f322e --- /dev/null +++ b/src/test/resources/COMPRESS-379.jar diff --git a/src/test/resources/COMPRESS-380/COMPRESS-380-dd.zip b/src/test/resources/COMPRESS-380/COMPRESS-380-dd.zip Binary files differnew file mode 100644 index 000000000..95579963d --- /dev/null +++ b/src/test/resources/COMPRESS-380/COMPRESS-380-dd.zip diff --git a/src/test/resources/COMPRESS-380/COMPRESS-380-input b/src/test/resources/COMPRESS-380/COMPRESS-380-input Binary files differnew file mode 100644 index 000000000..daf1f5610 --- /dev/null +++ b/src/test/resources/COMPRESS-380/COMPRESS-380-input diff --git a/src/test/resources/COMPRESS-380/COMPRESS-380-readbeyondmemory.zip b/src/test/resources/COMPRESS-380/COMPRESS-380-readbeyondmemory.zip Binary files differnew file mode 100644 index 000000000..99f352d1f --- /dev/null +++ b/src/test/resources/COMPRESS-380/COMPRESS-380-readbeyondmemory.zip diff --git a/src/test/resources/COMPRESS-380/COMPRESS-380.zip b/src/test/resources/COMPRESS-380/COMPRESS-380.zip Binary files differnew file mode 100644 index 000000000..d9146bea6 --- /dev/null +++ b/src/test/resources/COMPRESS-380/COMPRESS-380.zip diff --git a/src/test/resources/COMPRESS-382 b/src/test/resources/COMPRESS-382 Binary files differnew file mode 100644 index 000000000..be257f21e --- /dev/null +++ b/src/test/resources/COMPRESS-382 diff --git a/src/test/resources/COMPRESS-386 b/src/test/resources/COMPRESS-386 new file mode 100644 index 000000000..36d7f52f0 --- /dev/null +++ b/src/test/resources/COMPRESS-386 @@ -0,0 +1 @@ +B
\ No newline at end of file diff --git a/src/test/resources/COMPRESS-417.tar b/src/test/resources/COMPRESS-417.tar Binary files differnew file mode 100644 index 000000000..807683b02 --- /dev/null +++ b/src/test/resources/COMPRESS-417.tar diff --git a/src/test/resources/COMPRESS-459.cpio b/src/test/resources/COMPRESS-459.cpio Binary files differnew file mode 100644 index 000000000..8ae1662a0 --- /dev/null +++ b/src/test/resources/COMPRESS-459.cpio diff --git a/src/test/resources/META-INF/services/org.apache.commons.compress.archivers.ArchiveStreamProvider b/src/test/resources/META-INF/services/org.apache.commons.compress.archivers.ArchiveStreamProvider new file mode 100644 index 000000000..dea236f4e --- /dev/null +++ b/src/test/resources/META-INF/services/org.apache.commons.compress.archivers.ArchiveStreamProvider @@ -0,0 +1 @@ +org.apache.commons.compress.archivers.TestArchiveStreamProvider diff --git a/src/test/resources/META-INF/services/org.apache.commons.compress.compressors.CompressorStreamProvider b/src/test/resources/META-INF/services/org.apache.commons.compress.compressors.CompressorStreamProvider new file mode 100644 index 000000000..782d5816d --- /dev/null +++ b/src/test/resources/META-INF/services/org.apache.commons.compress.compressors.CompressorStreamProvider @@ -0,0 +1 @@ +org.apache.commons.compress.compressors.TestCompressorStreamProvider diff --git a/src/test/resources/OSX_ArchiveWithNestedArchive.zip b/src/test/resources/OSX_ArchiveWithNestedArchive.zip Binary files differnew file mode 100644 index 000000000..72b5d70f9 --- /dev/null +++ b/src/test/resources/OSX_ArchiveWithNestedArchive.zip diff --git a/src/test/resources/SHRUNK.ZIP b/src/test/resources/SHRUNK.ZIP Binary files differnew file mode 100644 index 000000000..372a8e71e --- /dev/null +++ b/src/test/resources/SHRUNK.ZIP diff --git a/src/test/resources/apache-maven-2.2.1.zip.001 b/src/test/resources/apache-maven-2.2.1.zip.001 Binary files differnew file mode 100644 index 000000000..691388e45 --- /dev/null +++ b/src/test/resources/apache-maven-2.2.1.zip.001 diff --git a/src/test/resources/archive_with_bytes_after_data.zip b/src/test/resources/archive_with_bytes_after_data.zip Binary files differnew file mode 100644 index 000000000..8937ede92 --- /dev/null +++ b/src/test/resources/archive_with_bytes_after_data.zip diff --git a/src/test/resources/archive_with_trailer.dump b/src/test/resources/archive_with_trailer.dump Binary files differnew file mode 100644 index 000000000..eb7c4ae4d --- /dev/null +++ b/src/test/resources/archive_with_trailer.dump diff --git a/src/test/resources/archive_with_trailer.tar b/src/test/resources/archive_with_trailer.tar Binary files differnew file mode 100644 index 000000000..6f889c721 --- /dev/null +++ b/src/test/resources/archive_with_trailer.tar diff --git a/src/test/resources/archive_with_trailer.zip b/src/test/resources/archive_with_trailer.zip Binary files differnew file mode 100644 index 000000000..3e5fb1f53 --- /dev/null +++ b/src/test/resources/archive_with_trailer.zip diff --git a/src/test/resources/archives/FreeBSD.ar b/src/test/resources/archives/FreeBSD.ar new file mode 100644 index 000000000..6bf911610 --- /dev/null +++ b/src/test/resources/archives/FreeBSD.ar @@ -0,0 +1,29 @@ +!<arch> +1/ 1238278221 1721 1721 100664 27 ` +abcdefghijklmnopqrstuvwxyz + +12/ 1238278221 1721 1721 100664 27 ` +abcdefghijklmnopqrstuvwxyz + +123/ 1238278221 1721 1721 100664 27 ` +abcdefghijklmnopqrstuvwxyz + +1234/ 1238278222 1721 1721 100664 27 ` +abcdefghijklmnopqrstuvwxyz + +12345/ 1238278222 1721 1721 100664 27 ` +abcdefghijklmnopqrstuvwxyz + +123450/ 1238278222 1721 1721 100644 27 ` +abcdefghijklmnopqrstuvwxyz + +123456/ 1238278222 1721 1721 100664 26 ` +bcdefghijklmnopqrstuvwxyz +123457/ 1238278222 1721 1721 100644 25 ` +cdefghijklmnopqrstuvwxyz + +123458/ 1238278222 1721 1721 100644 24 ` +defghijklmnopqrstuvwxyz +123459/ 1238278222 1721 1721 100644 23 ` +efghijklmnopqrstuvwxyz + diff --git a/src/test/resources/archives/FreeBSD.zip b/src/test/resources/archives/FreeBSD.zip Binary files differnew file mode 100644 index 000000000..0f721abb8 --- /dev/null +++ b/src/test/resources/archives/FreeBSD.zip diff --git a/src/test/resources/archives/FreeBSD_.tar b/src/test/resources/archives/FreeBSD_.tar Binary files differnew file mode 100644 index 000000000..fddd1cdc5 --- /dev/null +++ b/src/test/resources/archives/FreeBSD_.tar diff --git a/src/test/resources/archives/FreeBSD_bin.cpio b/src/test/resources/archives/FreeBSD_bin.cpio Binary files differnew file mode 100644 index 000000000..ab996f2e9 --- /dev/null +++ b/src/test/resources/archives/FreeBSD_bin.cpio diff --git a/src/test/resources/archives/FreeBSD_crc.cpio b/src/test/resources/archives/FreeBSD_crc.cpio Binary files differnew file mode 100644 index 000000000..76ba4ca87 --- /dev/null +++ b/src/test/resources/archives/FreeBSD_crc.cpio diff --git a/src/test/resources/archives/FreeBSD_hpbin.cpio b/src/test/resources/archives/FreeBSD_hpbin.cpio Binary files differnew file mode 100644 index 000000000..ab996f2e9 --- /dev/null +++ b/src/test/resources/archives/FreeBSD_hpbin.cpio diff --git a/src/test/resources/archives/FreeBSD_newc.cpio b/src/test/resources/archives/FreeBSD_newc.cpio Binary files differnew file mode 100644 index 000000000..3fd79a6e4 --- /dev/null +++ b/src/test/resources/archives/FreeBSD_newc.cpio diff --git a/src/test/resources/archives/FreeBSD_pax.tar b/src/test/resources/archives/FreeBSD_pax.tar Binary files differnew file mode 100644 index 000000000..a20d87975 --- /dev/null +++ b/src/test/resources/archives/FreeBSD_pax.tar diff --git a/src/test/resources/archives/FreeBSD_ustar.tar b/src/test/resources/archives/FreeBSD_ustar.tar Binary files differnew file mode 100644 index 000000000..fddd1cdc5 --- /dev/null +++ b/src/test/resources/archives/FreeBSD_ustar.tar diff --git a/src/test/resources/archives/SunOS.ar b/src/test/resources/archives/SunOS.ar new file mode 100644 index 000000000..80c363baf --- /dev/null +++ b/src/test/resources/archives/SunOS.ar @@ -0,0 +1,29 @@ +!<arch> +1/ 1238264462 2606 1 100644 27 ` +abcdefghijklmnopqrstuvwxyz + +12/ 1238277260 2606 1 100644 27 ` +abcdefghijklmnopqrstuvwxyz + +123/ 1238277265 2606 1 100644 27 ` +abcdefghijklmnopqrstuvwxyz + +1234/ 1238277275 2606 1 100644 27 ` +abcdefghijklmnopqrstuvwxyz + +12345/ 1238277277 2606 1 100644 27 ` +abcdefghijklmnopqrstuvwxyz + +123450/ 1238277385 2606 1 100644 27 ` +abcdefghijklmnopqrstuvwxyz + +123456/ 1238277620 2606 1 100644 26 ` +bcdefghijklmnopqrstuvwxyz +123457/ 1238277660 2606 1 100644 25 ` +cdefghijklmnopqrstuvwxyz + +123458/ 1238277670 2606 1 100644 24 ` +defghijklmnopqrstuvwxyz +123459/ 1238277679 2606 1 100644 23 ` +efghijklmnopqrstuvwxyz + diff --git a/src/test/resources/archives/SunOS.zip b/src/test/resources/archives/SunOS.zip Binary files differnew file mode 100644 index 000000000..01a863ab2 --- /dev/null +++ b/src/test/resources/archives/SunOS.zip diff --git a/src/test/resources/archives/SunOS_-c.cpio b/src/test/resources/archives/SunOS_-c.cpio Binary files differnew file mode 100644 index 000000000..298a96e3c --- /dev/null +++ b/src/test/resources/archives/SunOS_-c.cpio diff --git a/src/test/resources/archives/SunOS_.cpio b/src/test/resources/archives/SunOS_.cpio Binary files differnew file mode 100644 index 000000000..eab908dfa --- /dev/null +++ b/src/test/resources/archives/SunOS_.cpio diff --git a/src/test/resources/archives/SunOS_cAEf.tar b/src/test/resources/archives/SunOS_cAEf.tar Binary files differnew file mode 100644 index 000000000..97022785c --- /dev/null +++ b/src/test/resources/archives/SunOS_cAEf.tar diff --git a/src/test/resources/archives/SunOS_cEf.tar b/src/test/resources/archives/SunOS_cEf.tar Binary files differnew file mode 100644 index 000000000..e1c301d84 --- /dev/null +++ b/src/test/resources/archives/SunOS_cEf.tar diff --git a/src/test/resources/archives/SunOS_cf.tar b/src/test/resources/archives/SunOS_cf.tar Binary files differnew file mode 100644 index 000000000..d962041a4 --- /dev/null +++ b/src/test/resources/archives/SunOS_cf.tar diff --git a/src/test/resources/archives/SunOS_crc.cpio b/src/test/resources/archives/SunOS_crc.cpio Binary files differnew file mode 100644 index 000000000..e5af1fb64 --- /dev/null +++ b/src/test/resources/archives/SunOS_crc.cpio diff --git a/src/test/resources/archives/SunOS_odc.cpio b/src/test/resources/archives/SunOS_odc.cpio Binary files differnew file mode 100644 index 000000000..bb44e01e8 --- /dev/null +++ b/src/test/resources/archives/SunOS_odc.cpio diff --git a/src/test/resources/archives/files.txt b/src/test/resources/archives/files.txt new file mode 100644 index 000000000..4b403592d --- /dev/null +++ b/src/test/resources/archives/files.txt @@ -0,0 +1,11 @@ +# Sizes and filenames in the archives +27 1 +27 12 +27 123 +27 1234 +27 12345 +27 123450 +26 123456 +25 123457 +24 123458 +23 123459 diff --git a/src/test/resources/bla.7z b/src/test/resources/bla.7z Binary files differnew file mode 100644 index 000000000..623d74bfb --- /dev/null +++ b/src/test/resources/bla.7z diff --git a/src/test/resources/bla.ar b/src/test/resources/bla.ar new file mode 100644 index 000000000..c98e9b789 --- /dev/null +++ b/src/test/resources/bla.ar @@ -0,0 +1,27 @@ +!<arch> +test1.xml 1201445869 501 501 100644 610 ` +<?xml version = '1.0'?>
+<!DOCTYPE connections>
+<connections>
+<<<<<<< HEAD:testdata/test.xml +======= + as
+>>>>>>> 75cb63ff7005344589b57d17338b64783f8f430c:testdata/test.xml + <connection>
+ <JDBC_PORT>1521</JDBC_PORT>
+ <HOSTNAME>10.248.40.111</HOSTNAME>
+ <ConnectionType>JDBC</ConnectionType>
+ <DeployPassword>false</DeployPassword>
+ <user>appsrv</user>
+ <ConnectionName>Dev-DB</ConnectionName>
+ <SID>O10gIN1</SID>
+ <JdbcDriver>oracle.jdbc.driver.OracleDriver</JdbcDriver>
+ <ORACLE_JDBC_TYPE>thin</ORACLE_JDBC_TYPE>
+ </connection>
+</connections>
+test2.xml 1201445869 501 501 100644 82 ` +<?xml version = '1.0'?>
+<!DOCTYPE connections>
+<meinxml>
+ <leer />
+</meinxml> diff --git a/src/test/resources/bla.arj b/src/test/resources/bla.arj Binary files differnew file mode 100644 index 000000000..347b92373 --- /dev/null +++ b/src/test/resources/bla.arj diff --git a/src/test/resources/bla.cpio b/src/test/resources/bla.cpio Binary files differnew file mode 100644 index 000000000..47da4202d --- /dev/null +++ b/src/test/resources/bla.cpio diff --git a/src/test/resources/bla.deflate.7z b/src/test/resources/bla.deflate.7z Binary files differnew file mode 100644 index 000000000..696c3e437 --- /dev/null +++ b/src/test/resources/bla.deflate.7z diff --git a/src/test/resources/bla.deflate64.7z b/src/test/resources/bla.deflate64.7z Binary files differnew file mode 100644 index 000000000..94f60bd0d --- /dev/null +++ b/src/test/resources/bla.deflate64.7z diff --git a/src/test/resources/bla.dump b/src/test/resources/bla.dump Binary files differnew file mode 100644 index 000000000..51f96a392 --- /dev/null +++ b/src/test/resources/bla.dump diff --git a/src/test/resources/bla.dump.lz4 b/src/test/resources/bla.dump.lz4 Binary files differnew file mode 100644 index 000000000..d2a813f45 --- /dev/null +++ b/src/test/resources/bla.dump.lz4 diff --git a/src/test/resources/bla.encrypted.7z b/src/test/resources/bla.encrypted.7z Binary files differnew file mode 100644 index 000000000..2627f4153 --- /dev/null +++ b/src/test/resources/bla.encrypted.7z diff --git a/src/test/resources/bla.jar b/src/test/resources/bla.jar Binary files differnew file mode 100644 index 000000000..ad3ed8256 --- /dev/null +++ b/src/test/resources/bla.jar diff --git a/src/test/resources/bla.pack b/src/test/resources/bla.pack Binary files differnew file mode 100644 index 000000000..1332ca4dd --- /dev/null +++ b/src/test/resources/bla.pack diff --git a/src/test/resources/bla.tar b/src/test/resources/bla.tar Binary files differnew file mode 100644 index 000000000..c7af5379a --- /dev/null +++ b/src/test/resources/bla.tar diff --git a/src/test/resources/bla.tar.Z b/src/test/resources/bla.tar.Z Binary files differnew file mode 100644 index 000000000..e45d62d25 --- /dev/null +++ b/src/test/resources/bla.tar.Z diff --git a/src/test/resources/bla.tar.block_lz4 b/src/test/resources/bla.tar.block_lz4 Binary files differnew file mode 100644 index 000000000..1ceb997d8 --- /dev/null +++ b/src/test/resources/bla.tar.block_lz4 diff --git a/src/test/resources/bla.tar.br b/src/test/resources/bla.tar.br Binary files differnew file mode 100644 index 000000000..35b1f99ed --- /dev/null +++ b/src/test/resources/bla.tar.br diff --git a/src/test/resources/bla.tar.bz2 b/src/test/resources/bla.tar.bz2 Binary files differnew file mode 100644 index 000000000..fe3885887 --- /dev/null +++ b/src/test/resources/bla.tar.bz2 diff --git a/src/test/resources/bla.tar.deflate b/src/test/resources/bla.tar.deflate Binary files differnew file mode 100644 index 000000000..c83898fad --- /dev/null +++ b/src/test/resources/bla.tar.deflate diff --git a/src/test/resources/bla.tar.deflatez b/src/test/resources/bla.tar.deflatez Binary files differnew file mode 100644 index 000000000..32ebe7769 --- /dev/null +++ b/src/test/resources/bla.tar.deflatez diff --git a/src/test/resources/bla.tar.lz4 b/src/test/resources/bla.tar.lz4 Binary files differnew file mode 100644 index 000000000..cbf57fa8b --- /dev/null +++ b/src/test/resources/bla.tar.lz4 diff --git a/src/test/resources/bla.tar.lzma b/src/test/resources/bla.tar.lzma Binary files differnew file mode 100644 index 000000000..10968fdac --- /dev/null +++ b/src/test/resources/bla.tar.lzma diff --git a/src/test/resources/bla.tar.sz b/src/test/resources/bla.tar.sz Binary files differnew file mode 100644 index 000000000..fd74b4e0f --- /dev/null +++ b/src/test/resources/bla.tar.sz diff --git a/src/test/resources/bla.tar.xz b/src/test/resources/bla.tar.xz Binary files differnew file mode 100644 index 000000000..777c4ab37 --- /dev/null +++ b/src/test/resources/bla.tar.xz diff --git a/src/test/resources/bla.tgz b/src/test/resources/bla.tgz Binary files differnew file mode 100644 index 000000000..d741f1ee1 --- /dev/null +++ b/src/test/resources/bla.tgz diff --git a/src/test/resources/bla.txt.bz2 b/src/test/resources/bla.txt.bz2 Binary files differnew file mode 100644 index 000000000..87309da8f --- /dev/null +++ b/src/test/resources/bla.txt.bz2 diff --git a/src/test/resources/bla.unix.arj b/src/test/resources/bla.unix.arj Binary files differnew file mode 100644 index 000000000..4c9bc4d56 --- /dev/null +++ b/src/test/resources/bla.unix.arj diff --git a/src/test/resources/bla.xml.bz2 b/src/test/resources/bla.xml.bz2 Binary files differnew file mode 100644 index 000000000..a2bbcfcf3 --- /dev/null +++ b/src/test/resources/bla.xml.bz2 diff --git a/src/test/resources/bla.z.dump b/src/test/resources/bla.z.dump Binary files differnew file mode 100644 index 000000000..967a95e07 --- /dev/null +++ b/src/test/resources/bla.z.dump diff --git a/src/test/resources/bla.zip b/src/test/resources/bla.zip Binary files differnew file mode 100644 index 000000000..160eedc6b --- /dev/null +++ b/src/test/resources/bla.zip diff --git a/src/test/resources/brotli.testdata.compressed b/src/test/resources/brotli.testdata.compressed Binary files differnew file mode 100644 index 000000000..3769516d9 --- /dev/null +++ b/src/test/resources/brotli.testdata.compressed diff --git a/src/test/resources/brotli.testdata.uncompressed b/src/test/resources/brotli.testdata.uncompressed new file mode 100644 index 000000000..3f9cf8651 --- /dev/null +++ b/src/test/resources/brotli.testdata.uncompressed @@ -0,0 +1 @@ +XXXXXXXXXXYYYYYYYYYY
\ No newline at end of file diff --git a/src/test/resources/bzip2-zip.zip b/src/test/resources/bzip2-zip.zip Binary files differnew file mode 100644 index 000000000..e1a85c7a6 --- /dev/null +++ b/src/test/resources/bzip2-zip.zip diff --git a/src/test/resources/imploding-4Kdict-2trees.zip b/src/test/resources/imploding-4Kdict-2trees.zip Binary files differnew file mode 100644 index 000000000..106e41023 --- /dev/null +++ b/src/test/resources/imploding-4Kdict-2trees.zip diff --git a/src/test/resources/imploding-8Kdict-3trees.zip b/src/test/resources/imploding-8Kdict-3trees.zip Binary files differnew file mode 100644 index 000000000..7b966e0cc --- /dev/null +++ b/src/test/resources/imploding-8Kdict-3trees.zip diff --git a/src/test/resources/invalid-zip.zip b/src/test/resources/invalid-zip.zip new file mode 100644 index 000000000..ff6f1ec3d --- /dev/null +++ b/src/test/resources/invalid-zip.zip @@ -0,0 +1,2 @@ +This is not really a valid zip file even though it has the zip extension. +ZipArchiveInputStream.getNextZipEntry() should throw an exception. diff --git a/src/test/resources/longfile_bsd.ar b/src/test/resources/longfile_bsd.ar new file mode 100644 index 000000000..f7c3682d0 --- /dev/null +++ b/src/test/resources/longfile_bsd.ar @@ -0,0 +1,5 @@ +!<arch> +#1/28 1311256511 1000 1000 100644 42 ` +this_is_a_long_file_name.txtHello, world! +#1/36 1454694016 1000 1000 100664 40 ` +this_is_a_long_file_name_as_well.txtBye diff --git a/src/test/resources/longfile_gnu.ar b/src/test/resources/longfile_gnu.ar new file mode 100644 index 000000000..12799a033 --- /dev/null +++ b/src/test/resources/longfile_gnu.ar @@ -0,0 +1,8 @@ +!<arch> +// 68 ` +this_is_a_long_file_name.txt/ +this_is_a_long_file_name_as_well.txt/ +/0 1454693980 1000 1000 100664 14 ` +Hello, world! +/30 1454694016 1000 1000 100664 4 ` +Bye diff --git a/src/test/resources/longpath/cygwin_gnu.tar b/src/test/resources/longpath/cygwin_gnu.tar Binary files differnew file mode 100644 index 000000000..a992f45fa --- /dev/null +++ b/src/test/resources/longpath/cygwin_gnu.tar diff --git a/src/test/resources/longpath/cygwin_oldgnu.tar b/src/test/resources/longpath/cygwin_oldgnu.tar Binary files differnew file mode 100644 index 000000000..de8824a4c --- /dev/null +++ b/src/test/resources/longpath/cygwin_oldgnu.tar diff --git a/src/test/resources/longpath/cygwin_pax.tar b/src/test/resources/longpath/cygwin_pax.tar Binary files differnew file mode 100644 index 000000000..ef384b2c3 --- /dev/null +++ b/src/test/resources/longpath/cygwin_pax.tar diff --git a/src/test/resources/longpath/cygwin_posix.tar b/src/test/resources/longpath/cygwin_posix.tar Binary files differnew file mode 100644 index 000000000..92f4cb940 --- /dev/null +++ b/src/test/resources/longpath/cygwin_posix.tar diff --git a/src/test/resources/longpath/cygwin_ustar.tar b/src/test/resources/longpath/cygwin_ustar.tar Binary files differnew file mode 100644 index 000000000..f0ae82fbd --- /dev/null +++ b/src/test/resources/longpath/cygwin_ustar.tar diff --git a/src/test/resources/longpath/files.txt b/src/test/resources/longpath/files.txt new file mode 100644 index 000000000..eba6b65a4 --- /dev/null +++ b/src/test/resources/longpath/files.txt @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# File prefixes correspond to the following OSes: +# cygwin - CYGWIN_NT-5.1 1.7.1(0.218/5/3) +# hudson - SunOS 5.10 +# minotaur - FreeBSD 8.0-STABLE +# vmgump - Linux 2.6.24-27-server +# winXP_ant - Ant 1.7.1 on Windows/XP +# +compress-test/ +compress-test/dir1/ +compress-test/dir1/dir2/ +compress-test/dir1/dir2/dir3/ +compress-test/dir1/dir2/dir3/dir4/ +compress-test/dir1/dir2/dir3/dir4/dir5/ +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/ +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/ +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/ +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/ +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/ +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/file12345678901234567890.txt +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/file1234567890123456789012345678901234567890123456789012345678901234567890.txt +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/file123456789012345678901234567890123456789012345678901234567890.txt +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/file.txt +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/file123456789012345678901234567890.txt +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/file1234567890.txt +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/file1234567890123456789012345678901234567890.txt +compress-test/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10/file12345678901234567890123456789012345678901234567890.txt
\ No newline at end of file diff --git a/src/test/resources/longpath/hudson-E.tar b/src/test/resources/longpath/hudson-E.tar Binary files differnew file mode 100644 index 000000000..afcc870e7 --- /dev/null +++ b/src/test/resources/longpath/hudson-E.tar diff --git a/src/test/resources/longpath/hudson.tar b/src/test/resources/longpath/hudson.tar Binary files differnew file mode 100644 index 000000000..4fa733080 --- /dev/null +++ b/src/test/resources/longpath/hudson.tar diff --git a/src/test/resources/longpath/minotaur-0.jar b/src/test/resources/longpath/minotaur-0.jar Binary files differnew file mode 100644 index 000000000..53346ca9e --- /dev/null +++ b/src/test/resources/longpath/minotaur-0.jar diff --git a/src/test/resources/longpath/minotaur-M.jar b/src/test/resources/longpath/minotaur-M.jar Binary files differnew file mode 100644 index 000000000..815724a83 --- /dev/null +++ b/src/test/resources/longpath/minotaur-M.jar diff --git a/src/test/resources/longpath/minotaur-c.zip b/src/test/resources/longpath/minotaur-c.zip Binary files differnew file mode 100644 index 000000000..803b700b1 --- /dev/null +++ b/src/test/resources/longpath/minotaur-c.zip diff --git a/src/test/resources/longpath/minotaur-z.zip b/src/test/resources/longpath/minotaur-z.zip Binary files differnew file mode 100644 index 000000000..1ab9b2ee9 --- /dev/null +++ b/src/test/resources/longpath/minotaur-z.zip diff --git a/src/test/resources/longpath/minotaur.ar b/src/test/resources/longpath/minotaur.ar new file mode 100644 index 000000000..b26532b3a --- /dev/null +++ b/src/test/resources/longpath/minotaur.ar @@ -0,0 +1,17 @@ +!<arch> +// 350 ` +file12345678901234567890.txt/ +file1234567890123456789012345678901234567890123456789012345678901234567890.txt/ +file123456789012345678901234567890123456789012345678901234567890.txt/ +file123456789012345678901234567890.txt/ +file1234567890.txt/ +file1234567890123456789012345678901234567890.txt/ +file12345678901234567890123456789012345678901234567890.txt/ +/0 1273319811 1721 1721 100664 0 ` +/30 1273319842 1721 1721 100664 0 ` +/110 1273319835 1721 1721 100664 0 ` +file.txt/ 1273319777 1721 1721 100664 0 ` +/180 1273319817 1721 1721 100664 0 ` +/220 1273319794 1721 1721 100664 0 ` +/240 1273319823 1721 1721 100664 0 ` +/290 1273319828 1721 1721 100664 0 ` diff --git a/src/test/resources/longpath/minotaur.cpio b/src/test/resources/longpath/minotaur.cpio Binary files differnew file mode 100644 index 000000000..0445dc6af --- /dev/null +++ b/src/test/resources/longpath/minotaur.cpio diff --git a/src/test/resources/longpath/minotaur.jar b/src/test/resources/longpath/minotaur.jar Binary files differnew file mode 100644 index 000000000..d80d1b967 --- /dev/null +++ b/src/test/resources/longpath/minotaur.jar diff --git a/src/test/resources/longpath/minotaur.zip b/src/test/resources/longpath/minotaur.zip Binary files differnew file mode 100644 index 000000000..82a67901a --- /dev/null +++ b/src/test/resources/longpath/minotaur.zip diff --git a/src/test/resources/longpath/minotaur_pax.tar b/src/test/resources/longpath/minotaur_pax.tar Binary files differnew file mode 100644 index 000000000..4d448be02 --- /dev/null +++ b/src/test/resources/longpath/minotaur_pax.tar diff --git a/src/test/resources/longpath/minotaur_posix.tar b/src/test/resources/longpath/minotaur_posix.tar Binary files differnew file mode 100644 index 000000000..e83ce7cd0 --- /dev/null +++ b/src/test/resources/longpath/minotaur_posix.tar diff --git a/src/test/resources/longpath/minotaur_ustar.tar b/src/test/resources/longpath/minotaur_ustar.tar Binary files differnew file mode 100644 index 000000000..f7930ca3e --- /dev/null +++ b/src/test/resources/longpath/minotaur_ustar.tar diff --git a/src/test/resources/longpath/vmgump_gnu.tar b/src/test/resources/longpath/vmgump_gnu.tar Binary files differnew file mode 100644 index 000000000..255b4bed6 --- /dev/null +++ b/src/test/resources/longpath/vmgump_gnu.tar diff --git a/src/test/resources/longpath/vmgump_oldgnu.tar b/src/test/resources/longpath/vmgump_oldgnu.tar Binary files differnew file mode 100644 index 000000000..20089d2a3 --- /dev/null +++ b/src/test/resources/longpath/vmgump_oldgnu.tar diff --git a/src/test/resources/longpath/vmgump_pax.tar b/src/test/resources/longpath/vmgump_pax.tar Binary files differnew file mode 100644 index 000000000..2374f8244 --- /dev/null +++ b/src/test/resources/longpath/vmgump_pax.tar diff --git a/src/test/resources/longpath/vmgump_posix.tar b/src/test/resources/longpath/vmgump_posix.tar Binary files differnew file mode 100644 index 000000000..d1ac2aa12 --- /dev/null +++ b/src/test/resources/longpath/vmgump_posix.tar diff --git a/src/test/resources/longpath/vmgump_ustar.tar b/src/test/resources/longpath/vmgump_ustar.tar Binary files differnew file mode 100644 index 000000000..8d394add4 --- /dev/null +++ b/src/test/resources/longpath/vmgump_ustar.tar diff --git a/src/test/resources/longpath/winXP_antgnu.tar b/src/test/resources/longpath/winXP_antgnu.tar Binary files differnew file mode 100644 index 000000000..6321c1347 --- /dev/null +++ b/src/test/resources/longpath/winXP_antgnu.tar diff --git a/src/test/resources/longsymlink/files.txt b/src/test/resources/longsymlink/files.txt new file mode 100644 index 000000000..2af0afd78 --- /dev/null +++ b/src/test/resources/longsymlink/files.txt @@ -0,0 +1 @@ +0xxxxxxxxx10xxxxxxxx20xxxxxxxx30xxxxxxxx40xxxxxxxx50xxxxxxxx60xxxxxxxx70xxxxxxxx80xxxxxxxx90xxxxxxxx100xxxxxxx110xxxxxxx120xxxxxxx130xxxxxxx -> 0yyyyyyyyy10yyyyyyyy20yyyyyyyy30yyyyyyyy40yyyyyyyy50yyyyyyyy60yyyyyyyy70yyyyyyyy80yyyyyyyy90yyyyyyyy100yyyyyyy110yyyyyyy120yyyyyyy130yyyyyyy diff --git a/src/test/resources/longsymlink/gnu.tar b/src/test/resources/longsymlink/gnu.tar Binary files differnew file mode 100644 index 000000000..b747f11ec --- /dev/null +++ b/src/test/resources/longsymlink/gnu.tar diff --git a/src/test/resources/lorem-ipsum.txt.gz b/src/test/resources/lorem-ipsum.txt.gz Binary files differnew file mode 100644 index 000000000..5b42d8e43 --- /dev/null +++ b/src/test/resources/lorem-ipsum.txt.gz diff --git a/src/test/resources/lorem-ipsum.txt.sz b/src/test/resources/lorem-ipsum.txt.sz Binary files differnew file mode 100644 index 000000000..9a6b1fbb0 --- /dev/null +++ b/src/test/resources/lorem-ipsum.txt.sz diff --git a/src/test/resources/mixed.txt.sz b/src/test/resources/mixed.txt.sz Binary files differnew file mode 100644 index 000000000..4d21d3963 --- /dev/null +++ b/src/test/resources/mixed.txt.sz diff --git a/src/test/resources/mixed.zip b/src/test/resources/mixed.zip Binary files differnew file mode 100644 index 000000000..a36f2af6d --- /dev/null +++ b/src/test/resources/mixed.zip diff --git a/src/test/resources/moby-imploded.zip b/src/test/resources/moby-imploded.zip Binary files differnew file mode 100644 index 000000000..71dac4319 --- /dev/null +++ b/src/test/resources/moby-imploded.zip diff --git a/src/test/resources/moby.zip b/src/test/resources/moby.zip Binary files differnew file mode 100644 index 000000000..ae3f9ee6a --- /dev/null +++ b/src/test/resources/moby.zip diff --git a/src/test/resources/multiple.bz2 b/src/test/resources/multiple.bz2 Binary files differnew file mode 100644 index 000000000..26dc3a750 --- /dev/null +++ b/src/test/resources/multiple.bz2 diff --git a/src/test/resources/multiple.gz b/src/test/resources/multiple.gz Binary files differnew file mode 100644 index 000000000..f5fd0675e --- /dev/null +++ b/src/test/resources/multiple.gz diff --git a/src/test/resources/multiple.xz b/src/test/resources/multiple.xz Binary files differnew file mode 100644 index 000000000..5d2256bc2 --- /dev/null +++ b/src/test/resources/multiple.xz diff --git a/src/test/resources/oldgnu_sparse.tar b/src/test/resources/oldgnu_sparse.tar Binary files differnew file mode 100644 index 000000000..b9953438e --- /dev/null +++ b/src/test/resources/oldgnu_sparse.tar diff --git a/src/test/resources/ordertest.zip b/src/test/resources/ordertest.zip Binary files differnew file mode 100644 index 000000000..521158aca --- /dev/null +++ b/src/test/resources/ordertest.zip diff --git a/src/test/resources/password-encrypted.zip b/src/test/resources/password-encrypted.zip Binary files differnew file mode 100644 index 000000000..939029cd6 --- /dev/null +++ b/src/test/resources/password-encrypted.zip diff --git a/src/test/resources/pax_gnu_sparse.tar b/src/test/resources/pax_gnu_sparse.tar Binary files differnew file mode 100644 index 000000000..11fb4ddba --- /dev/null +++ b/src/test/resources/pax_gnu_sparse.tar diff --git a/src/test/resources/posix00_sparse.tar b/src/test/resources/posix00_sparse.tar Binary files differnew file mode 100644 index 000000000..70199f4b0 --- /dev/null +++ b/src/test/resources/posix00_sparse.tar diff --git a/src/test/resources/posix01_sparse.tar b/src/test/resources/posix01_sparse.tar Binary files differnew file mode 100644 index 000000000..8b43732f0 --- /dev/null +++ b/src/test/resources/posix01_sparse.tar diff --git a/src/test/resources/posix10_sparse.tar b/src/test/resources/posix10_sparse.tar Binary files differnew file mode 100644 index 000000000..e7ac38db4 --- /dev/null +++ b/src/test/resources/posix10_sparse.tar diff --git a/src/test/resources/preepoch-posix.tar b/src/test/resources/preepoch-posix.tar Binary files differnew file mode 100644 index 000000000..e9a89c3a2 --- /dev/null +++ b/src/test/resources/preepoch-posix.tar diff --git a/src/test/resources/preepoch-star.tar b/src/test/resources/preepoch-star.tar Binary files differnew file mode 100644 index 000000000..499ec8e1d --- /dev/null +++ b/src/test/resources/preepoch-star.tar diff --git a/src/test/resources/redline.cpio b/src/test/resources/redline.cpio Binary files differnew file mode 100644 index 000000000..f754d73ab --- /dev/null +++ b/src/test/resources/redline.cpio diff --git a/src/test/resources/simple-aix-native-tar.tar b/src/test/resources/simple-aix-native-tar.tar Binary files differnew file mode 100644 index 000000000..d8b1d11f9 --- /dev/null +++ b/src/test/resources/simple-aix-native-tar.tar diff --git a/src/test/resources/test with spaces.txt b/src/test/resources/test with spaces.txt new file mode 100644 index 000000000..b15c6a452 --- /dev/null +++ b/src/test/resources/test with spaces.txt @@ -0,0 +1,11 @@ +TEST WITH SPACES IN FILENAME +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011
\ No newline at end of file diff --git a/src/test/resources/test-winzip.zip b/src/test/resources/test-winzip.zip Binary files differnew file mode 100644 index 000000000..41e39e78c --- /dev/null +++ b/src/test/resources/test-winzip.zip diff --git a/src/test/resources/test.txt b/src/test/resources/test.txt new file mode 100644 index 000000000..beecb7519 --- /dev/null +++ b/src/test/resources/test.txt @@ -0,0 +1,10 @@ +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011 +111111111111111111111111111000101011
\ No newline at end of file diff --git a/src/test/resources/test1.xml b/src/test/resources/test1.xml new file mode 100644 index 000000000..3690c19f1 --- /dev/null +++ b/src/test/resources/test1.xml @@ -0,0 +1,4 @@ +<?xml version = '1.0'?> +<!DOCTYPE connections> +<connections> +</connections> diff --git a/src/test/resources/test2.xml b/src/test/resources/test2.xml new file mode 100644 index 000000000..dc9ddd021 --- /dev/null +++ b/src/test/resources/test2.xml @@ -0,0 +1,5 @@ +<?xml version = '1.0'?> +<!DOCTYPE connections> +<meinxml> + <leer /> +</meinxml> diff --git a/src/test/resources/test3.xml b/src/test/resources/test3.xml new file mode 100644 index 000000000..234c5b73f --- /dev/null +++ b/src/test/resources/test3.xml @@ -0,0 +1,10 @@ +<?xml version = '1.0'?> +<!DOCTYPE connections> +<text> +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, +sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, +sed diam voluptua. +At vero eos et accusam et justo duo dolores et ea rebum. +Stet clita kasd gubergren, no sea takimata sanctus est +Lorem ipsum dolor sit amet. +</text> diff --git a/src/test/resources/test4.xml b/src/test/resources/test4.xml new file mode 100644 index 000000000..132c9d1c4 --- /dev/null +++ b/src/test/resources/test4.xml @@ -0,0 +1,6 @@ +<?xml version = '1.0'?> +<!DOCTYPE connections> +<connections> +German Umlauts: ÜÄÖß +Stored as UTF-8 (Mac OSX 10.4.x) +</connections> diff --git a/src/test/resources/testAIFF.aif b/src/test/resources/testAIFF.aif Binary files differnew file mode 100644 index 000000000..97eac1d8e --- /dev/null +++ b/src/test/resources/testAIFF.aif diff --git a/src/test/resources/testCompress209.doc b/src/test/resources/testCompress209.doc Binary files differnew file mode 100644 index 000000000..871d30df3 --- /dev/null +++ b/src/test/resources/testCompress209.doc diff --git a/src/test/resources/testNumbersNew.numbers b/src/test/resources/testNumbersNew.numbers Binary files differnew file mode 100644 index 000000000..3f9a01302 --- /dev/null +++ b/src/test/resources/testNumbersNew.numbers diff --git a/src/test/resources/utf8-7zip-test.zip b/src/test/resources/utf8-7zip-test.zip Binary files differnew file mode 100644 index 000000000..fc86773c9 --- /dev/null +++ b/src/test/resources/utf8-7zip-test.zip diff --git a/src/test/resources/utf8-winzip-test.zip b/src/test/resources/utf8-winzip-test.zip Binary files differnew file mode 100644 index 000000000..32610048d --- /dev/null +++ b/src/test/resources/utf8-winzip-test.zip diff --git a/src/test/resources/zip64support.tar.bz2 b/src/test/resources/zip64support.tar.bz2 Binary files differnew file mode 100644 index 000000000..dbc590754 --- /dev/null +++ b/src/test/resources/zip64support.tar.bz2 diff --git a/src/test/resources/zipbomb.xlsx b/src/test/resources/zipbomb.xlsx Binary files differnew file mode 100644 index 000000000..46a4211cd --- /dev/null +++ b/src/test/resources/zipbomb.xlsx diff --git a/src/test/resources/zstd-tests.tar b/src/test/resources/zstd-tests.tar Binary files differnew file mode 100644 index 000000000..d3067fdc4 --- /dev/null +++ b/src/test/resources/zstd-tests.tar |