diff options
Diffstat (limited to 'src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java')
-rw-r--r-- | src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java | 408 |
1 files changed, 408 insertions, 0 deletions
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java new file mode 100644 index 000000000..da9bb24d7 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java @@ -0,0 +1,408 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.util.Enumeration; +import java.util.zip.CRC32; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.CharsetNames; +import org.junit.Test; + +public class UTF8ZipFilesTest extends AbstractTestCase { + + private static final String CP437 = "cp437"; + private static final String ASCII_TXT = "ascii.txt"; + private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt"; + private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt"; + + @Test + public void testUtf8FileRoundtripExplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.UTF_8, true, true); + } + + @Test + public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.UTF_8, false, true); + } + + @Test + public void testCP437FileRoundtripExplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CP437, false, true); + } + + @Test + public void testASCIIFileRoundtripExplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.US_ASCII, false, true); + } + + @Test + public void testUtf8FileRoundtripImplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.UTF_8, true, false); + } + + @Test + public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.UTF_8, false, false); + } + + @Test + public void testCP437FileRoundtripImplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CP437, false, false); + } + + @Test + public void testASCIIFileRoundtripImplicitUnicodeExtra() + throws IOException { + testFileRoundtrip(CharsetNames.US_ASCII, false, false); + } + + /* + * 7-ZIP created archive, uses EFS to signal UTF-8 filenames. + * + * 7-ZIP doesn't use EFS for strings that can be encoded in CP437 + * - which is true for OIL_BARREL_TXT. + */ + @Test + public void testRead7ZipArchive() throws IOException { + final File archive = getFile("utf8-7zip-test.zip"); + ZipFile zf = null; + try { + zf = new ZipFile(archive, CP437, false); + assertNotNull(zf.getEntry(ASCII_TXT)); + assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT)); + assertNotNull(zf.getEntry(OIL_BARREL_TXT)); + } finally { + ZipFile.closeQuietly(zf); + } + } + + @Test + public void testRead7ZipArchiveForStream() throws IOException { + final FileInputStream archive = + new FileInputStream(getFile("utf8-7zip-test.zip")); + ZipArchiveInputStream zi = null; + try { + zi = new ZipArchiveInputStream(archive, CP437, false); + assertEquals(ASCII_TXT, zi.getNextEntry().getName()); + assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); + assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); + } finally { + if (zi != null) { + zi.close(); + } + } + } + + /* + * WinZIP created archive, uses Unicode Extra Fields but only in + * the central directory. + */ + @Test + public void testReadWinZipArchive() throws IOException { + final File archive = getFile("utf8-winzip-test.zip"); + ZipFile zf = null; + try { + zf = new ZipFile(archive, null, true); + assertCanRead(zf, ASCII_TXT); + assertCanRead(zf, EURO_FOR_DOLLAR_TXT); + assertCanRead(zf, OIL_BARREL_TXT); + } finally { + ZipFile.closeQuietly(zf); + } + } + + private void assertCanRead(final ZipFile zf, final String fileName) throws IOException { + final ZipArchiveEntry entry = zf.getEntry(fileName); + assertNotNull("Entry doesn't exist", entry); + final InputStream is = zf.getInputStream(entry); + assertNotNull("InputStream is null", is); + try { + is.read(); + } finally { + is.close(); + } + } + + @Test + public void testReadWinZipArchiveForStream() throws IOException { + final FileInputStream archive = + new FileInputStream(getFile("utf8-winzip-test.zip")); + ZipArchiveInputStream zi = null; + try { + zi = new ZipArchiveInputStream(archive, null, true); + assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); + assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); + assertEquals(ASCII_TXT, zi.getNextEntry().getName()); + } finally { + if (zi != null) { + zi.close(); + } + } + } + + @Test + public void testZipFileReadsUnicodeFields() throws IOException { + final File file = File.createTempFile("unicode-test", ".zip"); + file.deleteOnExit(); + ZipArchiveInputStream zi = null; + try { + createTestFile(file, CharsetNames.US_ASCII, false, true); + final FileInputStream archive = new FileInputStream(file); + zi = new ZipArchiveInputStream(archive, CharsetNames.US_ASCII, true); + assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); + assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); + assertEquals(ASCII_TXT, zi.getNextEntry().getName()); + } finally { + if (zi != null) { + zi.close(); + } + tryHardToDelete(file); + } + } + + @Test + public void testZipArchiveInputStreamReadsUnicodeFields() + throws IOException { + final File file = File.createTempFile("unicode-test", ".zip"); + file.deleteOnExit(); + ZipFile zf = null; + try { + createTestFile(file, CharsetNames.US_ASCII, false, true); + zf = new ZipFile(file, CharsetNames.US_ASCII, true); + assertNotNull(zf.getEntry(ASCII_TXT)); + assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT)); + assertNotNull(zf.getEntry(OIL_BARREL_TXT)); + } finally { + ZipFile.closeQuietly(zf); + tryHardToDelete(file); + } + } + + @Test + public void testRawNameReadFromZipFile() + throws IOException { + final File archive = getFile("utf8-7zip-test.zip"); + ZipFile zf = null; + try { + zf = new ZipFile(archive, CP437, false); + assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT)); + } finally { + ZipFile.closeQuietly(zf); + } + } + + @Test + public void testRawNameReadFromStream() + throws IOException { + final FileInputStream archive = + new FileInputStream(getFile("utf8-7zip-test.zip")); + ZipArchiveInputStream zi = null; + try { + zi = new ZipArchiveInputStream(archive, CP437, false); + assertRawNameOfAcsiiTxt((ZipArchiveEntry) zi.getNextEntry()); + } finally { + if (zi != null) { + zi.close(); + } + } + } + + private static void testFileRoundtrip(final String encoding, final boolean withEFS, + final boolean withExplicitUnicodeExtra) + throws IOException { + + final File file = File.createTempFile(encoding + "-test", ".zip"); + file.deleteOnExit(); + try { + createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra); + testFile(file, encoding); + } finally { + tryHardToDelete(file); + } + } + + private static void createTestFile(final File file, final String encoding, + final boolean withEFS, + final boolean withExplicitUnicodeExtra) + throws UnsupportedEncodingException, IOException { + + final ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + + ZipArchiveOutputStream zos = null; + try { + zos = new ZipArchiveOutputStream(file); + zos.setEncoding(encoding); + zos.setUseLanguageEncodingFlag(withEFS); + zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ? + ZipArchiveOutputStream + .UnicodeExtraFieldPolicy.NEVER + : ZipArchiveOutputStream + .UnicodeExtraFieldPolicy.ALWAYS); + + ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT); + if (withExplicitUnicodeExtra + && !zipEncoding.canEncode(ze.getName())) { + + final ByteBuffer en = zipEncoding.encode(ze.getName()); + + ze.addExtraField(new UnicodePathExtraField(ze.getName(), + en.array(), + en.arrayOffset(), + en.limit() + - en.position())); + } + + zos.putArchiveEntry(ze); + zos.write("Hello, world!".getBytes(CharsetNames.US_ASCII)); + zos.closeArchiveEntry(); + + ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT); + if (withExplicitUnicodeExtra + && !zipEncoding.canEncode(ze.getName())) { + + final ByteBuffer en = zipEncoding.encode(ze.getName()); + + ze.addExtraField(new UnicodePathExtraField(ze.getName(), + en.array(), + en.arrayOffset(), + en.limit() + - en.position())); + } + + zos.putArchiveEntry(ze); + zos.write("Give me your money!".getBytes(CharsetNames.US_ASCII)); + zos.closeArchiveEntry(); + + ze = new ZipArchiveEntry(ASCII_TXT); + + if (withExplicitUnicodeExtra + && !zipEncoding.canEncode(ze.getName())) { + + final ByteBuffer en = zipEncoding.encode(ze.getName()); + + ze.addExtraField(new UnicodePathExtraField(ze.getName(), + en.array(), + en.arrayOffset(), + en.limit() + - en.position())); + } + + zos.putArchiveEntry(ze); + zos.write("ascii".getBytes(CharsetNames.US_ASCII)); + zos.closeArchiveEntry(); + + zos.finish(); + } finally { + if (zos != null) { + try { + zos.close(); + } catch (final IOException e) { /* swallow */ } + } + } + } + + private static void testFile(final File file, final String encoding) + throws IOException { + ZipFile zf = null; + try { + zf = new ZipFile(file, encoding, false); + + final Enumeration<ZipArchiveEntry> e = zf.getEntries(); + while (e.hasMoreElements()) { + final ZipArchiveEntry ze = e.nextElement(); + + if (ze.getName().endsWith("sser.txt")) { + assertUnicodeName(ze, OIL_BARREL_TXT, encoding); + + } else if (ze.getName().endsWith("_for_Dollar.txt")) { + assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding); + } else if (!ze.getName().equals(ASCII_TXT)) { + throw new AssertionError("Unrecognized ZIP entry with name [" + + ze.getName() + "] found."); + } + } + } finally { + ZipFile.closeQuietly(zf); + } + } + + private static UnicodePathExtraField findUniCodePath(final ZipArchiveEntry ze) { + return (UnicodePathExtraField) + ze.getExtraField(UnicodePathExtraField.UPATH_ID); + } + + private static void assertUnicodeName(final ZipArchiveEntry ze, + final String expectedName, + final String encoding) + throws IOException { + if (!expectedName.equals(ze.getName())) { + final UnicodePathExtraField ucpf = findUniCodePath(ze); + assertNotNull(ucpf); + + final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding); + final ByteBuffer ne = enc.encode(ze.getName()); + + final CRC32 crc = new CRC32(); + crc.update(ne.array(), ne.arrayOffset(), + ne.limit() - ne.position()); + + assertEquals(crc.getValue(), ucpf.getNameCRC32()); + assertEquals(expectedName, new String(ucpf.getUnicodeName(), + CharsetNames.UTF_8)); + } + } + + @Test + public void testUtf8Interoperability() throws IOException { + final File file1 = getFile("utf8-7zip-test.zip"); + final File file2 = getFile("utf8-winzip-test.zip"); + + testFile(file1,CP437); + testFile(file2,CP437); + + } + + private static void assertRawNameOfAcsiiTxt(final ZipArchiveEntry ze) { + final byte[] b = ze.getRawName(); + assertNotNull(b); + final int len = ASCII_TXT.length(); + assertEquals(len, b.length); + for (int i = 0; i < len; i++) { + assertEquals("Byte " + i, (byte) ASCII_TXT.charAt(i), b[i]); + } + assertNotSame(b, ze.getRawName()); + } +} + |