aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/org/yaml/snakeyaml/reader/UnicodeReader.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/yaml/snakeyaml/reader/UnicodeReader.java')
-rw-r--r--src/main/java/org/yaml/snakeyaml/reader/UnicodeReader.java187
1 files changed, 91 insertions, 96 deletions
diff --git a/src/main/java/org/yaml/snakeyaml/reader/UnicodeReader.java b/src/main/java/org/yaml/snakeyaml/reader/UnicodeReader.java
index dd9dc39b..4c9c9039 100644
--- a/src/main/java/org/yaml/snakeyaml/reader/UnicodeReader.java
+++ b/src/main/java/org/yaml/snakeyaml/reader/UnicodeReader.java
@@ -1,40 +1,29 @@
/**
- * Copyright (c) 2008, http://www.snakeyaml.org
+ * Copyright (c) 2008, SnakeYAML
*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
*/
package org.yaml.snakeyaml.reader;
/**
- version: 1.1 / 2007-01-25
- - changed BOM recognition ordering (longer boms first)
-
- Original pseudocode : Thomas Weidenfeller
- Implementation tweaked: Aki Nieminen
- Implementation changed: Andrey Somov
- * UTF-32 removed because it is not supported by YAML
- * no default encoding
-
- http://www.unicode.org/unicode/faq/utf_bom.html
- BOMs:
- 00 00 FE FF = UTF-32, big-endian
- FF FE 00 00 = UTF-32, little-endian
- EF BB BF = UTF-8,
- FE FF = UTF-16, big-endian
- FF FE = UTF-16, little-endian
-
- Win2k Notepad:
- Unicode format = UTF-16LE
+ * version: 1.1 / 2007-01-25 - changed BOM recognition ordering (longer boms first)
+ *
+ * Original pseudocode : Thomas Weidenfeller Implementation tweaked: Aki Nieminen Implementation
+ * changed: Andrey Somov UTF-32 removed because it is not supported by YAML no default encoding
+ *
+ * http://www.unicode.org/unicode/faq/utf_bom.html BOMs: 00 00 FE FF = UTF-32, big-endian FF FE 00
+ * 00 = UTF-32, little-endian EF BB BF = UTF-8, FE FF = UTF-16, big-endian FF FE = UTF-16,
+ * little-endian
+ *
+ * Win2k Notepad: Unicode format = UTF-16LE
***/
import java.io.IOException;
@@ -45,81 +34,87 @@ import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
/**
- * Generic unicode textreader, which will use BOM mark to identify the encoding
- * to be used. If BOM is not found then use a given default or system encoding.
+ * Generic unicode textreader, which will use BOM mark to identify the encoding to be used. If BOM
+ * is not found then use a given default or system encoding.
*/
public class UnicodeReader extends Reader {
- private static final Charset UTF8 = Charset.forName("UTF-8");
- private static final Charset UTF16BE = Charset.forName("UTF-16BE");
- private static final Charset UTF16LE = Charset.forName("UTF-16LE");
-
- PushbackInputStream internalIn;
- InputStreamReader internalIn2 = null;
-
- private static final int BOM_SIZE = 3;
- /**
- * @param in
- * InputStream to be read
- */
- public UnicodeReader(InputStream in) {
- internalIn = new PushbackInputStream(in, BOM_SIZE);
+ private static final Charset UTF8 = StandardCharsets.UTF_8;
+ private static final Charset UTF16BE = StandardCharsets.UTF_16BE;
+ private static final Charset UTF16LE = StandardCharsets.UTF_16LE;
+
+ PushbackInputStream internalIn;
+ InputStreamReader internalIn2 = null;
+
+ private static final int BOM_SIZE = 3;
+
+ /**
+ * @param in InputStream to be read
+ */
+ public UnicodeReader(InputStream in) {
+ internalIn = new PushbackInputStream(in, BOM_SIZE);
+ }
+
+ /**
+ * Get stream encoding or NULL if stream is uninitialized. Call init() or read() method to
+ * initialize it.
+ *
+ * @return the name of the character encoding being used by this stream.
+ */
+ public String getEncoding() {
+ return internalIn2.getEncoding();
+ }
+
+ /**
+ * Read-ahead four bytes and check for BOM marks. Extra bytes are unread back to the stream, only
+ * BOM bytes are skipped.
+ *
+ * @throws IOException if InputStream cannot be created
+ */
+ protected void init() throws IOException {
+ if (internalIn2 != null) {
+ return;
}
- /**
- * Get stream encoding or NULL if stream is uninitialized. Call init() or
- * read() method to initialize it.
- */
- public String getEncoding() {
- return internalIn2.getEncoding();
+ Charset encoding;
+ byte[] bom = new byte[BOM_SIZE];
+ int n, unread;
+ n = internalIn.read(bom, 0, bom.length);
+
+ if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
+ encoding = UTF8;
+ unread = n - 3;
+ } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
+ encoding = UTF16BE;
+ unread = n - 2;
+ } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
+ encoding = UTF16LE;
+ unread = n - 2;
+ } else {
+ // Unicode BOM mark not found, unread all bytes
+ encoding = UTF8;
+ unread = n;
}
- /**
- * Read-ahead four bytes and check for BOM marks. Extra bytes are unread
- * back to the stream, only BOM bytes are skipped.
- */
- protected void init() throws IOException {
- if (internalIn2 != null)
- return;
-
- Charset encoding;
- byte bom[] = new byte[BOM_SIZE];
- int n, unread;
- n = internalIn.read(bom, 0, bom.length);
-
- if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
- encoding = UTF8;
- unread = n - 3;
- } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
- encoding = UTF16BE;
- unread = n - 2;
- } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
- encoding = UTF16LE;
- unread = n - 2;
- } else {
- // Unicode BOM mark not found, unread all bytes
- encoding = UTF8;
- unread = n;
- }
-
- if (unread > 0)
- internalIn.unread(bom, (n - unread), unread);
-
- // Use given encoding
- CharsetDecoder decoder = encoding.newDecoder().onUnmappableCharacter(
- CodingErrorAction.REPORT);
- internalIn2 = new InputStreamReader(internalIn, decoder);
+ if (unread > 0) {
+ internalIn.unread(bom, (n - unread), unread);
}
- public void close() throws IOException {
- init();
- internalIn2.close();
- }
-
- public int read(char[] cbuf, int off, int len) throws IOException {
- init();
- return internalIn2.read(cbuf, off, len);
- }
-} \ No newline at end of file
+ // Use given encoding
+ CharsetDecoder decoder = encoding.newDecoder().onUnmappableCharacter(CodingErrorAction.REPORT);
+ internalIn2 = new InputStreamReader(internalIn, decoder);
+ }
+
+ public void close() throws IOException {
+ init();
+ internalIn2.close();
+ }
+
+ public int read(char[] cbuf, int off, int len) throws IOException {
+ init();
+ return internalIn2.read(cbuf, off, len);
+ }
+}