diff options
author | Brett Vickers <brett@beevik.com> | 2023-05-08 07:38:28 -0700 |
---|---|---|
committer | Brett Vickers <brett@beevik.com> | 2023-05-08 07:38:28 -0700 |
commit | d50c5837e553b5f7acaaba3b1034d9c82c819cfd (patch) | |
tree | d9c51a6dd27061f77c54b16184d94e104831537c | |
parent | 211cdce3bc3744f79f009e4e4763b121da43c635 (diff) | |
download | go-etree-d50c5837e553b5f7acaaba3b1034d9c82c819cfd.tar.gz |
Remove CDATA preservation on read
This feature, introduced in v1.1.3, was implemented in such a way
that it broke the ability to read XML documents encoded in non-UTF8
character sets.
-rw-r--r-- | etree.go | 5 | ||||
-rw-r--r-- | etree_test.go | 23 | ||||
-rw-r--r-- | helpers.go | 45 |
3 files changed, 8 insertions, 65 deletions
@@ -775,7 +775,6 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er var stack stack stack.push(e) for { - xr.ResetPeek(dec.InputOffset()) t, err := dec.RawToken() switch { case err == io.EOF: @@ -806,9 +805,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er case xml.CharData: data := string(t) var flags charDataFlags - if xr.PeekContainsCdata() { - flags = cdataFlag - } else if isWhitespace(data) { + if isWhitespace(data) { flags = whitespaceFlag } newCharData(data, flags, top) diff --git a/etree_test.go b/etree_test.go index 180c1a8..07eee08 100644 --- a/etree_test.go +++ b/etree_test.go @@ -892,8 +892,8 @@ func TestIndentPreserveWhitespace(t *testing.T) { {"<test> </test>", "<test> </test>"}, {"<test>\t</test>", "<test>\t</test>"}, {"<test>\t\n \t</test>", "<test>\t\n \t</test>"}, - {"<test><![CDATA[ ]]></test>", "<test><![CDATA[ ]]></test>"}, - {"<test> <![CDATA[ ]]> </test>", "<test><![CDATA[ ]]></test>"}, + {"<test><![CDATA[ ]]></test>", "<test> </test>"}, + {"<test> <![CDATA[ ]]> </test>", "<test/>"}, {"<outer> <inner> </inner> </outer>", "<outer>\n <inner> </inner>\n</outer>"}, } @@ -1278,22 +1278,3 @@ func TestWhitespace(t *testing.T) { cd.SetData("") checkBoolEq(t, cd.IsWhitespace(), true) } - -func TestPreserveCDATA(t *testing.T) { - s := `<name><![CDATA[My]] <b>name</b> <![CDATA[is]]></name>` - - doc := NewDocument() - err := doc.ReadFromString(s) - if err != nil { - t.Fatalf("etree: failed to ReadFromString: %v", err) - } - - result, err := doc.WriteToString() - if err != nil { - t.Fatalf("etree: failed to WriteToString: %v", err) - } - - if result != s { - t.Errorf("etree: wanted %q, got %q", s, result) - } -} @@ -6,7 +6,6 @@ package etree import ( "bufio" - "bytes" "io" "strings" "unicode/utf8" @@ -88,54 +87,20 @@ func (f *fifo) grow() { // bytes read from its encapsulated reader and detects when a CDATA // prefix has been parsed. type xmlReader struct { - r io.ByteReader + r io.Reader bytes int64 - peek []byte - last byte } var cdataPrefix = []byte("<![CDATA[") func newXmlReader(r io.Reader) *xmlReader { - return &xmlReader{ - r: bufio.NewReader(r), - bytes: 0, - peek: make([]byte, 0, len(cdataPrefix)), - last: 0, - } + return &xmlReader{r, 0} } func (xr *xmlReader) Read(p []byte) (n int, err error) { - // Since xmlReader implements the io.ByteReader interface, the XML decoder - // bypasses Read in favor of ReadByte. - return 0, nil -} - -func (xr *xmlReader) ReadByte() (b byte, err error) { - b, err = xr.r.ReadByte() - if err == nil { - xr.last = b - xr.bytes += 1 - if len(xr.peek) < len(cdataPrefix) { - xr.peek = append(xr.peek, b) - } - } - return b, err -} - -func (xr *xmlReader) ResetPeek(decoderOffset int64) { - xr.peek = xr.peek[0:0] - - // If the decoder offset doesn't match the number of bytes read so far, - // then the decoder performed an "unget" on the last byte read. Return - // this byte to the front of the peek buffer. - if decoderOffset != xr.bytes { - xr.peek = append(xr.peek, xr.last) - } -} - -func (xr *xmlReader) PeekContainsCdata() bool { - return bytes.Equal(xr.peek, cdataPrefix) + n, err = xr.r.Read(p) + xr.bytes += int64(n) + return n, err } // xmlWriter implements a proxy writer that counts the number of |