aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java')
-rw-r--r--src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java216
1 files changed, 216 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
new file mode 100644
index 000000000..0a7581acf
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.archivers.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+/**
+ * A ZipEncoding, which uses a java.nio {@link
+ * java.nio.charset.Charset Charset} to encode names.
+ * <p>The methods of this class are reentrant.</p>
+ * @Immutable
+ */
+class NioZipEncoding implements ZipEncoding, CharsetAccessor {
+
+ private final Charset charset;
+ private final boolean useReplacement;
+ private static final char REPLACEMENT = '?';
+ private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT };
+ private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT);
+ private static final char[] HEX_CHARS = new char[] {
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+ };
+
+
+ /**
+ * Construct an NioZipEncoding using the given charset.
+ * @param charset The character set to use.
+ * @param useReplacement should invalid characters be replaced, or reported.
+ */
+ NioZipEncoding(final Charset charset, boolean useReplacement) {
+ this.charset = charset;
+ this.useReplacement = useReplacement;
+ }
+
+ @Override
+ public Charset getCharset() {
+ return charset;
+ }
+
+ /**
+ * @see ZipEncoding#canEncode(java.lang.String)
+ */
+ @Override
+ public boolean canEncode(final String name) {
+ final CharsetEncoder enc = newEncoder();
+
+ return enc.canEncode(name);
+ }
+
+ /**
+ * @see ZipEncoding#encode(java.lang.String)
+ */
+ @Override
+ public ByteBuffer encode(final String name) {
+ final CharsetEncoder enc = newEncoder();
+
+ final CharBuffer cb = CharBuffer.wrap(name);
+ CharBuffer tmp = null;
+ ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));
+
+ while (cb.remaining() > 0) {
+ final CoderResult res = enc.encode(cb, out, false);
+
+ if (res.isUnmappable() || res.isMalformed()) {
+
+ // write the unmappable characters in utf-16
+ // pseudo-URL encoding style to ByteBuffer.
+
+ int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length());
+ if (spaceForSurrogate > out.remaining()) {
+ // if the destination buffer isn't over sized, assume that the presence of one
+ // unmappable character makes it likely that there will be more. Find all the
+ // un-encoded characters and allocate space based on those estimates.
+ int charCount = 0;
+ for (int i = cb.position() ; i < cb.limit(); i++) {
+ charCount += !enc.canEncode(cb.get(i)) ? 6 : 1;
+ }
+ int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
+ out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining());
+ }
+ if (tmp == null) {
+ tmp = CharBuffer.allocate(6);
+ }
+ for (int i = 0; i < res.length(); ++i) {
+ out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out);
+ }
+
+ } else if (res.isOverflow()) {
+ int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
+ out = ZipEncodingHelper.growBufferBy(out, increment);
+ }
+ }
+ // tell the encoder we are done
+ enc.encode(cb, out, true);
+ // may have caused underflow, but that's been ignored traditionally
+
+ out.limit(out.position());
+ out.rewind();
+ return out;
+ }
+
+ /**
+ * @see
+ * ZipEncoding#decode(byte[])
+ */
+ @Override
+ public String decode(final byte[] data) throws IOException {
+ return newDecoder()
+ .decode(ByteBuffer.wrap(data)).toString();
+ }
+
+ private static ByteBuffer encodeFully(CharsetEncoder enc, CharBuffer cb, ByteBuffer out) {
+ ByteBuffer o = out;
+ while (cb.hasRemaining()) {
+ CoderResult result = enc.encode(cb, o, false);
+ if (result.isOverflow()) {
+ int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
+ o = ZipEncodingHelper.growBufferBy(o, increment);
+ }
+ }
+ return o;
+ }
+
+ private static CharBuffer encodeSurrogate(CharBuffer cb, char c) {
+ cb.position(0).limit(6);
+ cb.put('%');
+ cb.put('U');
+
+ cb.put(HEX_CHARS[(c >> 12) & 0x0f]);
+ cb.put(HEX_CHARS[(c >> 8) & 0x0f]);
+ cb.put(HEX_CHARS[(c >> 4) & 0x0f]);
+ cb.put(HEX_CHARS[c & 0x0f]);
+ cb.flip();
+ return cb;
+ }
+
+ private CharsetEncoder newEncoder() {
+ if (useReplacement) {
+ return charset.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .replaceWith(REPLACEMENT_BYTES);
+ } else {
+ return charset.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ }
+
+ private CharsetDecoder newDecoder() {
+ if (!useReplacement) {
+ return this.charset.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ } else {
+ return charset.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .replaceWith(REPLACEMENT_STRING);
+ }
+ }
+
+ /**
+ * Estimate the initial encoded size (in bytes) for a character buffer.
+ * <p>
+ * The estimate assumes that one character consumes uses the maximum length encoding,
+ * whilst the rest use an average size encoding. This accounts for any BOM for UTF-16, at
+ * the expense of a couple of extra bytes for UTF-8 encoded ASCII.
+ * </p>
+ *
+ * @param enc encoder to use for estimates
+ * @param charChount number of characters in string
+ * @return estimated size in bytes.
+ */
+ private static int estimateInitialBufferSize(CharsetEncoder enc, int charChount) {
+ float first = enc.maxBytesPerChar();
+ float rest = (charChount - 1) * enc.averageBytesPerChar();
+ return (int) Math.ceil(first + rest);
+ }
+
+ /**
+ * Estimate the size needed for remaining characters
+ *
+ * @param enc encoder to use for estimates
+ * @param charCount number of characters remaining
+ * @return estimated size in bytes.
+ */
+ private static int estimateIncrementalEncodingSize(CharsetEncoder enc, int charCount) {
+ return (int) Math.ceil(charCount * enc.averageBytesPerChar());
+ }
+
+}