aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrett Vickers <brett@beevik.com>2023-05-08 07:38:28 -0700
committerBrett Vickers <brett@beevik.com>2023-05-08 07:38:28 -0700
commitd50c5837e553b5f7acaaba3b1034d9c82c819cfd (patch)
treed9c51a6dd27061f77c54b16184d94e104831537c
parent211cdce3bc3744f79f009e4e4763b121da43c635 (diff)
downloadgo-etree-d50c5837e553b5f7acaaba3b1034d9c82c819cfd.tar.gz
Remove CDATA preservation on read
This feature, introduced in v1.1.3, was implemented in such a way that it broke the ability to read XML documents encoded in non-UTF8 character sets.
-rw-r--r--etree.go5
-rw-r--r--etree_test.go23
-rw-r--r--helpers.go45
3 files changed, 8 insertions, 65 deletions
diff --git a/etree.go b/etree.go
index 5d272a3..ac9dc74 100644
--- a/etree.go
+++ b/etree.go
@@ -775,7 +775,6 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
var stack stack
stack.push(e)
for {
- xr.ResetPeek(dec.InputOffset())
t, err := dec.RawToken()
switch {
case err == io.EOF:
@@ -806,9 +805,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
case xml.CharData:
data := string(t)
var flags charDataFlags
- if xr.PeekContainsCdata() {
- flags = cdataFlag
- } else if isWhitespace(data) {
+ if isWhitespace(data) {
flags = whitespaceFlag
}
newCharData(data, flags, top)
diff --git a/etree_test.go b/etree_test.go
index 180c1a8..07eee08 100644
--- a/etree_test.go
+++ b/etree_test.go
@@ -892,8 +892,8 @@ func TestIndentPreserveWhitespace(t *testing.T) {
{"<test> </test>", "<test> </test>"},
{"<test>\t</test>", "<test>\t</test>"},
{"<test>\t\n \t</test>", "<test>\t\n \t</test>"},
- {"<test><![CDATA[ ]]></test>", "<test><![CDATA[ ]]></test>"},
- {"<test> <![CDATA[ ]]> </test>", "<test><![CDATA[ ]]></test>"},
+ {"<test><![CDATA[ ]]></test>", "<test> </test>"},
+ {"<test> <![CDATA[ ]]> </test>", "<test/>"},
{"<outer> <inner> </inner> </outer>", "<outer>\n <inner> </inner>\n</outer>"},
}
@@ -1278,22 +1278,3 @@ func TestWhitespace(t *testing.T) {
cd.SetData("")
checkBoolEq(t, cd.IsWhitespace(), true)
}
-
-func TestPreserveCDATA(t *testing.T) {
- s := `<name><![CDATA[My]] <b>name</b> <![CDATA[is]]></name>`
-
- doc := NewDocument()
- err := doc.ReadFromString(s)
- if err != nil {
- t.Fatalf("etree: failed to ReadFromString: %v", err)
- }
-
- result, err := doc.WriteToString()
- if err != nil {
- t.Fatalf("etree: failed to WriteToString: %v", err)
- }
-
- if result != s {
- t.Errorf("etree: wanted %q, got %q", s, result)
- }
-}
diff --git a/helpers.go b/helpers.go
index 2fddc28..39a31ee 100644
--- a/helpers.go
+++ b/helpers.go
@@ -6,7 +6,6 @@ package etree
import (
"bufio"
- "bytes"
"io"
"strings"
"unicode/utf8"
@@ -88,54 +87,20 @@ func (f *fifo) grow() {
// bytes read from its encapsulated reader and detects when a CDATA
// prefix has been parsed.
type xmlReader struct {
- r io.ByteReader
+ r io.Reader
bytes int64
- peek []byte
- last byte
}
var cdataPrefix = []byte("<![CDATA[")
func newXmlReader(r io.Reader) *xmlReader {
- return &xmlReader{
- r: bufio.NewReader(r),
- bytes: 0,
- peek: make([]byte, 0, len(cdataPrefix)),
- last: 0,
- }
+ return &xmlReader{r, 0}
}
func (xr *xmlReader) Read(p []byte) (n int, err error) {
- // Since xmlReader implements the io.ByteReader interface, the XML decoder
- // bypasses Read in favor of ReadByte.
- return 0, nil
-}
-
-func (xr *xmlReader) ReadByte() (b byte, err error) {
- b, err = xr.r.ReadByte()
- if err == nil {
- xr.last = b
- xr.bytes += 1
- if len(xr.peek) < len(cdataPrefix) {
- xr.peek = append(xr.peek, b)
- }
- }
- return b, err
-}
-
-func (xr *xmlReader) ResetPeek(decoderOffset int64) {
- xr.peek = xr.peek[0:0]
-
- // If the decoder offset doesn't match the number of bytes read so far,
- // then the decoder performed an "unget" on the last byte read. Return
- // this byte to the front of the peek buffer.
- if decoderOffset != xr.bytes {
- xr.peek = append(xr.peek, xr.last)
- }
-}
-
-func (xr *xmlReader) PeekContainsCdata() bool {
- return bytes.Equal(xr.peek, cdataPrefix)
+ n, err = xr.r.Read(p)
+ xr.bytes += int64(n)
+ return n, err
}
// xmlWriter implements a proxy writer that counts the number of