aboutsummaryrefslogtreecommitdiff
path: root/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java')
-rw-r--r--src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java553
1 files changed, 553 insertions, 0 deletions
diff --git a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
new file mode 100644
index 00000000..312577c7
--- /dev/null
+++ b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
@@ -0,0 +1,553 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.net.URL;
+import java.net.URLConnection;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.text.MessageFormat;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+import org.junitpioneer.jupiter.DefaultLocale;
+
+public class XmlStreamReaderTest {
+
+ private static final String ISO_8859_1 = StandardCharsets.ISO_8859_1.name();
+ private static final String US_ASCII = StandardCharsets.US_ASCII.name();
+ private static final String UTF_16 = StandardCharsets.UTF_16.name();
+ private static final String UTF_16LE = StandardCharsets.UTF_16LE.name();
+ private static final String UTF_16BE = StandardCharsets.UTF_16BE.name();
+ private static final String UTF_32 = "UTF-32";
+ private static final String UTF_32LE = "UTF-32LE";
+ private static final String UTF_32BE = "UTF-32BE";
+ private static final String UTF_8 = StandardCharsets.UTF_8.name();
+ private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
+ private static final String XML4 = "xml-prolog-encoding-single-quotes";
+ private static final String XML3 = "xml-prolog-encoding-double-quotes";
+ private static final String XML2 = "xml-prolog";
+ private static final String XML1 = "xml";
+
+ private static final String ENCODING_ATTRIBUTE_XML = "<?xml version=\"1.0\" ?> \n"
+ + "<atom:feed xmlns:atom=\"http://www.w3.org/2005/Atom\">\n"
+ + "\n"
+ + " <atom:entry>\n"
+ + " <atom:title encoding='base64'><![CDATA\n"
+ + "aW5nTGluZSIgLz4";
+
+ private static final int[] NO_BOM_BYTES = {};
+
+ private static final int[] UTF_16BE_BOM_BYTES = {0xFE, 0xFF};
+
+ private static final int[] UTF_16LE_BOM_BYTES = {0xFF, 0XFE};
+
+ private static final int[] UTF_32BE_BOM_BYTES = {0x00, 0x00, 0xFE, 0xFF};
+
+ private static final int[] UTF_32LE_BOM_BYTES = {0xFF, 0XFE, 0x00, 0x00};
+
+ private static final int[] UTF_8_BOM_BYTES = {0xEF, 0xBB, 0xBF};
+
+ private static final Map<String, int[]> BOMs = new HashMap<>();
+
+ static {
+ BOMs.put("no-bom", NO_BOM_BYTES);
+ BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES);
+ BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES);
+ BOMs.put("UTF-32BE-bom", UTF_32BE_BOM_BYTES);
+ BOMs.put("UTF-32LE-bom", UTF_32LE_BOM_BYTES);
+ BOMs.put("UTF-16-bom", NO_BOM_BYTES); // it's added by the writer
+ BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES);
+ }
+
+ private static final MessageFormat XML = new MessageFormat(
+ "<root>{2}</root>");
+
+ private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
+ "<?xml version=\"1.0\"?>\n<root>{2}</root>");
+
+ private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
+ "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
+
+ private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
+ "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
+
+ private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
+ "<?xml version=\"1.0\" encoding = \t \n \r''{1}''?>\n<root>{2}</root>");
+
+ private static final MessageFormat INFO = new MessageFormat(
+ "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
+
+ private static final Map<String, MessageFormat> XMLs = new HashMap<>();
+
+ static {
+ XMLs.put(XML1, XML);
+ XMLs.put(XML2, XML_WITH_PROLOG);
+ XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
+ XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
+ XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
+ }
+
+ /**
+ * Create the XML.
+ */
+ private String getXML(final String bomType, final String xmlType,
+ final String streamEnc, final String prologEnc) {
+ final MessageFormat xml = XMLs.get(xmlType);
+ final String info = INFO.format(new Object[]{bomType, xmlType, prologEnc});
+ return xml.format(new Object[]{streamEnc, prologEnc, info});
+ }
+
+ /**
+ * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
+ * @param xmlType xml, xml-prolog, xml-prolog-charset
+ * @param streamEnc encoding of the stream
+ * @param prologEnc encoding of the prolog
+ * @return XML stream
+ * @throws IOException If an I/O error occurs
+ */
+ protected InputStream getXmlInputStream(final String bomType, final String xmlType,
+ final String streamEnc, final String prologEnc) throws IOException {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
+ int[] bom = BOMs.get(bomType);
+ if (bom == null) {
+ bom = new int[0];
+ }
+ for (final int element : bom) {
+ baos.write(element);
+ }
+ try (Writer writer = new OutputStreamWriter(baos, streamEnc)) {
+ final String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc);
+ writer.write(xmlDoc);
+
+ // PADDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
+ writer.write("<da>\n");
+ for (int i = 0; i < 10000; i++) {
+ writer.write("<do/>\n");
+ }
+ writer.write("</da>\n");
+
+ }
+ return new ByteArrayInputStream(baos.toByteArray());
+ }
+
+ public void testAlternateDefaultEncoding(final String cT, final String bomEnc, final String streamEnc, final String prologEnc, final String alternateEnc)
+ throws Exception {
+ try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
+ XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false, alternateEnc)) {
+ assertEquals(xmlReader.getDefaultEncoding(), alternateEnc);
+ if (!streamEnc.equals(UTF_16)) {
+ // we can not assert things here because UTF-8, US-ASCII and
+ // ISO-8859-1 look alike for the chars used for detection
+ // (niallp 2010-10-06 - I re-instated the check below - the tests(6) passed)
+ final String enc = alternateEnc != null ? alternateEnc : streamEnc;
+ assertEquals(xmlReader.getEncoding(), enc);
+ } else {
+ // String enc = (alternateEnc != null) ? alternateEnc : streamEnc;
+ assertEquals(xmlReader.getEncoding().substring(0, streamEnc.length()), streamEnc);
+ }
+ }
+ }
+
+ @Test
+ protected void testConstructorFileInput() throws IOException {
+ try (XmlStreamReader reader = new XmlStreamReader(new File("pom.xml"))) {
+ // do nothing
+ }
+ }
+
+ @Test
+ protected void testConstructorFileInputNull() {
+ assertThrows(NullPointerException.class, () -> new XmlStreamReader((File) null));
+ }
+
+ @Test
+ protected void testConstructorInputStreamInput() throws IOException {
+ try (XmlStreamReader reader = new XmlStreamReader(Files.newInputStream(Paths.get("pom.xml")))) {
+ // do nothing
+ }
+ }
+
+ @Test
+ protected void testConstructorInputStreamInputNull() {
+ assertThrows(NullPointerException.class, () -> new XmlStreamReader((InputStream) null));
+ }
+
+ protected void testConstructorPathInput() throws IOException {
+ try (XmlStreamReader reader = new XmlStreamReader(Paths.get("pom.xml"))) {
+ // do nothing
+ }
+ }
+
+ @Test
+ protected void testConstructorPathInputNull() {
+ assertThrows(NullPointerException.class, () -> new XmlStreamReader((Path) null));
+ }
+
+ @Test
+ protected void testConstructorURLConnectionInput() throws IOException {
+ try (XmlStreamReader reader = new XmlStreamReader(new URL("https://www.apache.org/").openConnection(), UTF_8)) {
+ // do nothing
+ }
+ }
+
+ @Test
+ protected void testConstructorURLConnectionInputNull() {
+ assertThrows(NullPointerException.class, () -> new XmlStreamReader((URLConnection) null, US_ASCII));
+ }
+
+ @Test
+ protected void testConstructorURLInput() throws IOException {
+ try (XmlStreamReader reader = new XmlStreamReader(new URL("https://www.apache.org/"))) {
+ // do nothing
+ }
+ }
+
+ @Test
+ protected void testConstructorURLInputNull() throws IOException {
+ assertThrows(NullPointerException.class, () -> new XmlStreamReader((URL) null));
+ }
+
+ @Test
+ public void testEncodingAttributeXML() throws Exception {
+ try (InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML.getBytes(StandardCharsets.UTF_8));
+ XmlStreamReader xmlReader = new XmlStreamReader(is, "", true)) {
+ assertEquals(xmlReader.getEncoding(), UTF_8);
+ }
+ }
+
+ // XML Stream generator
+
+ @Test
+ public void testHttp() throws Exception {
+ // niallp 2010-10-06 - remove following 2 tests - I reinstated
+ // checks for non-UTF-16 encodings (18 tests) and these failed
+ // _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
+ // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
+ testHttpValid("application/xml", "UTF-8-bom", UTF_8, null);
+ testHttpValid("application/xml", "UTF-8-bom", UTF_8, UTF_8);
+ testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null);
+ testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", UTF_8, null);
+ testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", UTF_8, null);
+ testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8);
+ testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null);
+ testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16);
+ testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
+
+ testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null);
+ testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16);
+ testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
+
+ testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null);
+ testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32);
+ testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
+
+ testHttpInvalid("application/xml", "UTF-8-bom", US_ASCII, US_ASCII);
+ testHttpInvalid("application/xml;charset=UTF-16", UTF_16LE, UTF_8, UTF_8);
+ testHttpInvalid("application/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE);
+ testHttpInvalid("application/xml;charset=UTF-32", UTF_32LE, UTF_8, UTF_8);
+ testHttpInvalid("application/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE);
+
+ testHttpValid("text/xml", "no-bom", US_ASCII, null);
+ testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8);
+ testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null);
+ testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null);
+ testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16);
+ testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
+ testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, null);
+ testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32);
+ testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
+ testHttpValid("text/xml", "UTF-8-bom", US_ASCII, null);
+
+ testAlternateDefaultEncoding("application/xml", "UTF-8-bom", UTF_8, null, null);
+ testAlternateDefaultEncoding("application/xml", "no-bom", US_ASCII, null, US_ASCII);
+ testAlternateDefaultEncoding("application/xml", "UTF-8-bom", UTF_8, null, UTF_8);
+ testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, null);
+ testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, US_ASCII);
+ testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, UTF_8);
+
+ testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null);
+ testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16);
+ testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
+ testHttpInvalid("text/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE);
+ testHttpInvalid("text/xml;charset=UTF-16", "no-bom", UTF_16BE, null);
+
+ testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null);
+ testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32);
+ testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
+ testHttpInvalid("text/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE);
+ testHttpInvalid("text/xml;charset=UTF-32", "no-bom", UTF_32BE, null);
+
+ testHttpLenient("text/xml", "no-bom", US_ASCII, null, US_ASCII);
+ testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8, UTF_8);
+ testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null, UTF_8);
+ testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null, UTF_16BE);
+ testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16, UTF_16);
+ testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE, UTF_16BE);
+ testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, null, UTF_32BE);
+ testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32, UTF_32);
+ testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32BE, UTF_32BE);
+ testHttpLenient("text/xml", "UTF-8-bom", US_ASCII, null, US_ASCII);
+
+ testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null, UTF_16BE);
+ testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16, UTF_16);
+ testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE, UTF_16BE);
+ testHttpLenient("text/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE, UTF_16BE);
+ testHttpLenient("text/xml;charset=UTF-16", "no-bom", UTF_16BE, null, UTF_16);
+
+ testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null, UTF_32BE);
+ testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32, UTF_32);
+ testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE, UTF_32BE);
+ testHttpLenient("text/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE, UTF_32BE);
+ testHttpLenient("text/xml;charset=UTF-32", "no-bom", UTF_32BE, null, UTF_32);
+
+ testHttpLenient("text/html", "no-bom", US_ASCII, US_ASCII, US_ASCII);
+ testHttpLenient("text/html", "no-bom", US_ASCII, null, US_ASCII);
+ testHttpLenient("text/html;charset=UTF-8", "no-bom", US_ASCII, UTF_8, UTF_8);
+ testHttpLenient("text/html;charset=UTF-16BE", "no-bom", US_ASCII, UTF_8, UTF_8);
+ testHttpLenient("text/html;charset=UTF-32BE", "no-bom", US_ASCII, UTF_8, UTF_8);
+ }
+
+ @Test
+ public void testHttpContent() throws Exception {
+ final String encoding = UTF_8;
+ final String xml = getXML("no-bom", XML3, encoding, encoding);
+ try (XmlStreamReader xmlReader = new XmlStreamReader(new StringInputStream(xml, encoding))) {
+ assertEquals(xmlReader.getEncoding(), encoding, "Check encoding");
+ assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
+ }
+ }
+
+ protected void testHttpInvalid(final String cT, final String bomEnc, final String streamEnc,
+ final String prologEnc) throws Exception {
+ try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML2 : XML3, streamEnc, prologEnc)) {
+ try {
+ new XmlStreamReader(is, cT, false).close();
+ fail("It should have failed for HTTP Content-type " + cT + ", BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc);
+ } catch (final IOException ex) {
+ assertTrue(ex.getMessage().contains("Invalid encoding,"));
+ }
+ }
+ }
+
+ protected void testHttpLenient(final String cT, final String bomEnc, final String streamEnc,
+ final String prologEnc, final String shouldBe) throws Exception {
+ try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
+ XmlStreamReader xmlReader = new XmlStreamReader(is, cT, true)) {
+ assertEquals(xmlReader.getEncoding(), shouldBe);
+ }
+ }
+
+ public void testHttpValid(final String cT, final String bomEnc, final String streamEnc,
+ final String prologEnc) throws Exception {
+ try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
+ XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false)) {
+ if (!streamEnc.equals(UTF_16)) {
+ // we can not assert things here because UTF-8, US-ASCII and
+ // ISO-8859-1 look alike for the chars used for detection
+ // (niallp 2010-10-06 - I re-instated the check below and removed the 2 tests that failed)
+ assertEquals(xmlReader.getEncoding(), streamEnc);
+ } else {
+ assertEquals(xmlReader.getEncoding().substring(0, streamEnc.length()), streamEnc);
+ }
+ }
+ }
+
+ // Turkish language has specific rules to convert dotted and dotless i character.
+ @Test
+ @DefaultLocale(language = "tr")
+ public void testLowerCaseEncodingWithTurkishLocale_IO_557() throws Exception {
+ final String[] encodings = {"iso8859-1", "us-ascii", "utf-8"};
+ for (final String encoding : encodings) {
+ final String xml = getXML("no-bom", XML3, encoding, encoding);
+ try (ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding)); XmlStreamReader xmlReader = new XmlStreamReader(is)) {
+ assertTrue(encoding.equalsIgnoreCase(xmlReader.getEncoding()), "Check encoding : " + encoding);
+ assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
+ }
+ }
+ }
+
+ protected void testRawBomInvalid(final String bomEnc, final String streamEnc,
+ final String prologEnc) throws Exception {
+ final InputStream is = getXmlInputStream(bomEnc, XML3, streamEnc, prologEnc);
+ XmlStreamReader xmlReader = null;
+ try {
+ xmlReader = new XmlStreamReader(is, false);
+ final String foundEnc = xmlReader.getEncoding();
+ fail("Expected IOException for BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc
+ + ": found " + foundEnc);
+ } catch (final IOException ex) {
+ assertTrue(ex.getMessage().contains("Invalid encoding,"));
+ }
+ if (xmlReader != null) {
+ xmlReader.close();
+ }
+ }
+
+ @Test
+ public void testRawBomUtf16() throws Exception {
+ testRawBomValid(UTF_16BE);
+ testRawBomValid(UTF_16LE);
+ testRawBomValid(UTF_16);
+
+ testRawBomInvalid("UTF-16BE-bom", UTF_16BE, UTF_16LE);
+ testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_16BE);
+ testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_8);
+ }
+
+ @Test
+ public void testRawBomUtf32() throws Exception {
+ testRawBomValid(UTF_32BE);
+ testRawBomValid(UTF_32LE);
+ testRawBomValid(UTF_32);
+
+ testRawBomInvalid("UTF-32BE-bom", UTF_32BE, UTF_32LE);
+ testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_32BE);
+ testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_8);
+ }
+
+ @Test
+ public void testRawBomUtf8() throws Exception {
+ testRawBomValid(UTF_8);
+ testRawBomInvalid("UTF-8-bom", US_ASCII, US_ASCII);
+ testRawBomInvalid("UTF-8-bom", ISO_8859_1, ISO_8859_1);
+ testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16);
+ testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16BE);
+ testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16LE);
+ testRawBomInvalid("UTF-16BE-bom", UTF_16BE, UTF_16LE);
+ testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_16BE);
+ testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_8);
+ testRawBomInvalid("UTF-32BE-bom", UTF_32BE, UTF_32LE);
+ testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_32BE);
+ testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_8);
+ }
+
+ protected void testRawBomValid(final String encoding) throws Exception {
+ try (InputStream is = getXmlInputStream(encoding + "-bom", XML3, encoding, encoding);
+ XmlStreamReader xmlReader = new XmlStreamReader(is, false)) {
+ if (!encoding.equals(UTF_16) && !encoding.equals(UTF_32)) {
+ assertEquals(xmlReader.getEncoding(), encoding);
+ } else {
+ assertEquals(xmlReader.getEncoding().substring(0, encoding.length()), encoding);
+ }
+ }
+ }
+
+ @Test
+ public void testRawContent() throws Exception {
+ final String encoding = UTF_8;
+ final String xml = getXML("no-bom", XML3, encoding, encoding);
+ try (XmlStreamReader xmlReader = new XmlStreamReader(new StringInputStream(xml, encoding))) {
+ assertEquals(xmlReader.getEncoding(), encoding, "Check encoding");
+ assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
+ }
+ }
+
+ @Test
+ public void testRawNoBomCp1047() throws Exception {
+ testRawNoBomValid("CP1047");
+ }
+
+ protected void testRawNoBomInvalid(final String encoding) throws Exception {
+ try (final InputStream is = getXmlInputStream("no-bom", XML3, encoding, encoding)) {
+ try {
+ new XmlStreamReader(is, false).close();
+ fail("It should have failed");
+ } catch (final IOException ex) {
+ assertTrue(ex.getMessage().contains("Invalid encoding,"));
+ }
+ }
+ }
+
+ @Test
+ public void testRawNoBomIso8859_1() throws Exception {
+ testRawNoBomValid(ISO_8859_1);
+ }
+
+ @Test
+ public void testRawNoBomUsAscii() throws Exception {
+ testRawNoBomValid(US_ASCII);
+ }
+
+ @Test
+ public void testRawNoBomUtf16BE() throws Exception {
+ testRawNoBomValid(UTF_16BE);
+ }
+
+ @Test
+ public void testRawNoBomUtf16LE() throws Exception {
+ testRawNoBomValid(UTF_16LE);
+ }
+
+ @Test
+ public void testRawNoBomUtf32BE() throws Exception {
+ testRawNoBomValid(UTF_32BE);
+ }
+
+ @Test
+ public void testRawNoBomUtf32LE() throws Exception {
+ testRawNoBomValid(UTF_32LE);
+ }
+
+ @Test
+ public void testRawNoBomUtf8() throws Exception {
+ testRawNoBomValid(UTF_8);
+ }
+
+ protected void testRawNoBomValid(final String encoding) throws Exception {
+ InputStream is = getXmlInputStream("no-bom", XML1, encoding, encoding);
+ XmlStreamReader xmlReader = new XmlStreamReader(is, false);
+ assertEquals(xmlReader.getEncoding(), UTF_8);
+ xmlReader.close();
+
+ is = getXmlInputStream("no-bom", XML2, encoding, encoding);
+ xmlReader = new XmlStreamReader(is);
+ assertEquals(xmlReader.getEncoding(), UTF_8);
+ xmlReader.close();
+
+ is = getXmlInputStream("no-bom", XML3, encoding, encoding);
+ xmlReader = new XmlStreamReader(is);
+ assertEquals(xmlReader.getEncoding(), encoding);
+ xmlReader.close();
+
+ is = getXmlInputStream("no-bom", XML4, encoding, encoding);
+ xmlReader = new XmlStreamReader(is);
+ assertEquals(xmlReader.getEncoding(), encoding);
+ xmlReader.close();
+
+ is = getXmlInputStream("no-bom", XML5, encoding, encoding);
+ xmlReader = new XmlStreamReader(is);
+ assertEquals(xmlReader.getEncoding(), encoding);
+ xmlReader.close();
+ }
+}