1 files changed, 216 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
new file mode 100644
index 000000000..0a7581acf
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.archivers.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+/**
+ * A ZipEncoding, which uses a java.nio {@link
+ * java.nio.charset.Charset Charset} to encode names.
+ * <p>The methods of this class are reentrant.</p>
+ * @Immutable
+ */
+class NioZipEncoding implements ZipEncoding, CharsetAccessor {
+
+    private final Charset charset;
+    private final boolean useReplacement;
+    private static final char REPLACEMENT = '?';
+    private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT };
+    private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT);
+    private static final char[] HEX_CHARS = new char[] {
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+    };
+
+
+    /**
+     * Construct an NioZipEncoding using the given charset.
+     * @param charset  The character set to use.
+     * @param useReplacement should invalid characters be replaced, or reported.
+     */
+    NioZipEncoding(final Charset charset, boolean useReplacement) {
+        this.charset = charset;
+        this.useReplacement = useReplacement;
+    }
+
+    @Override
+    public Charset getCharset() {
+        return charset;
+    }
+
+    /**
+     * @see  ZipEncoding#canEncode(java.lang.String)
+     */
+    @Override
+    public boolean canEncode(final String name) {
+        final CharsetEncoder enc = newEncoder();
+
+        return enc.canEncode(name);
+    }
+
+    /**
+     * @see ZipEncoding#encode(java.lang.String)
+     */
+    @Override
+    public ByteBuffer encode(final String name) {
+        final CharsetEncoder enc = newEncoder();
+
+        final CharBuffer cb = CharBuffer.wrap(name);
+        CharBuffer tmp = null;
+        ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));
+
+        while (cb.remaining() > 0) {
+            final CoderResult res = enc.encode(cb, out, false);
+
+            if (res.isUnmappable() || res.isMalformed()) {
+
+                // write the unmappable characters in utf-16
+                // pseudo-URL encoding style to ByteBuffer.
+
+                int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length());
+                if (spaceForSurrogate > out.remaining()) {
+                    // if the destination buffer isn't over sized, assume that the presence of one
+                    // unmappable character makes it likely that there will be more. Find all the
+                    // un-encoded characters and allocate space based on those estimates.
+                    int charCount = 0;
+                    for (int i = cb.position() ; i < cb.limit(); i++) {
+                        charCount += !enc.canEncode(cb.get(i)) ? 6 : 1;
+                    }
+                    int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
+                    out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining());
+                }
+                if (tmp == null) {
+                    tmp = CharBuffer.allocate(6);
+                }
+                for (int i = 0; i < res.length(); ++i) {
+                    out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out);
+                }
+
+            } else if (res.isOverflow()) {
+                int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
+                out = ZipEncodingHelper.growBufferBy(out, increment);
+            }
+        }
+        // tell the encoder we are done
+        enc.encode(cb, out, true);
+        // may have caused underflow, but that's been ignored traditionally
+
+        out.limit(out.position());
+        out.rewind();
+        return out;
+    }
+
+    /**
+     * @see
+     * ZipEncoding#decode(byte[])
+     */
+    @Override
+    public String decode(final byte[] data) throws IOException {
+        return newDecoder()
+            .decode(ByteBuffer.wrap(data)).toString();
+    }
+
+    private static ByteBuffer encodeFully(CharsetEncoder enc, CharBuffer cb, ByteBuffer out) {
+        ByteBuffer o = out;
+        while (cb.hasRemaining()) {
+            CoderResult result = enc.encode(cb, o, false);
+            if (result.isOverflow()) {
+                int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
+                o = ZipEncodingHelper.growBufferBy(o, increment);
+            }
+        }
+        return o;
+    }
+
+    private static CharBuffer encodeSurrogate(CharBuffer cb, char c) {
+        cb.position(0).limit(6);
+        cb.put('%');
+        cb.put('U');
+
+        cb.put(HEX_CHARS[(c >> 12) & 0x0f]);
+        cb.put(HEX_CHARS[(c >> 8) & 0x0f]);
+        cb.put(HEX_CHARS[(c >> 4) & 0x0f]);
+        cb.put(HEX_CHARS[c & 0x0f]);
+        cb.flip();
+        return cb;
+    }
+
+    private CharsetEncoder newEncoder() {
+        if (useReplacement) {
+            return charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                .replaceWith(REPLACEMENT_BYTES);
+        } else {
+            return charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        }
+    }
+
+    private CharsetDecoder newDecoder() {
+        if (!useReplacement) {
+            return this.charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        } else {
+            return  charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                .replaceWith(REPLACEMENT_STRING);
+        }
+    }
+
+    /**
+     * Estimate the initial encoded size (in bytes) for a character buffer.
+     * <p>
+     * The estimate assumes that one character consumes uses the maximum length encoding,
+     * whilst the rest use an average size encoding. This accounts for any BOM for UTF-16, at
+     * the expense of a couple of extra bytes for UTF-8 encoded ASCII.
+     * </p>
+     *
+     * @param enc        encoder to use for estimates
+     * @param charChount number of characters in string
+     * @return estimated size in bytes.
+     */
+    private static int estimateInitialBufferSize(CharsetEncoder enc, int charChount) {
+        float first = enc.maxBytesPerChar();
+        float rest = (charChount - 1) * enc.averageBytesPerChar();
+        return (int) Math.ceil(first + rest);
+    }
+
+    /**
+     * Estimate the size needed for remaining characters
+     *
+     * @param enc       encoder to use for estimates
+     * @param charCount number of characters remaining
+     * @return estimated size in bytes.
+     */
+    private static int estimateIncrementalEncodingSize(CharsetEncoder enc, int charCount) {
+        return (int) Math.ceil(charCount * enc.averageBytesPerChar());
+    }
+
+}