aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrett Vickers <brett@beevik.com>2023-05-11 10:58:40 -0700
committerBrett Vickers <brett@beevik.com>2023-05-11 11:04:17 -0700
commitecaabe808842728a9fd6cf40f412238ac9a84434 (patch)
tree013f94ec4db11b49597233196df597fa436af0e7
parenta291eec23391d27aa821b5d32b0ea835b8330374 (diff)
downloadgo-etree-ecaabe808842728a9fd6cf40f412238ac9a84434.tar.gz
Token WriteTo and indentation modifications
Rename etree.XMLWriter to etree.Writer. Add unit tests for new WriteTo feature. Add Element.IndentWithSettings to make it possible to re-indent an element before calling its WriteTo function. Rename IndentSettings property SuppressTrailingNewline to SuppressTrailingWhitespace. This is more accurate terminology.
-rw-r--r--etree.go218
-rw-r--r--etree_test.go40
-rw-r--r--helpers.go2
3 files changed, 156 insertions, 104 deletions
diff --git a/etree.go b/etree.go
index 1ec8094..83df8b2 100644
--- a/etree.go
+++ b/etree.go
@@ -107,14 +107,6 @@ type WriteSettings struct {
UseCRLF bool
}
-// XMLWriter is a Writer that also has convenience methods for writing
-// strings an single bytes.
-type XMLWriter interface {
- io.StringWriter
- io.ByteWriter
- io.Writer
-}
-
// newWriteSettings creates a default WriteSettings record.
func newWriteSettings() WriteSettings {
return WriteSettings{
@@ -152,29 +144,58 @@ type IndentSettings struct {
// false.
PreserveLeafWhitespace bool
- // SuppressTrailingNewline suppresses the generation of a trailing newline
- // character at the end of the indented document. Default: false.
- SuppressTrailingNewline bool
+ // SuppressTrailingWhitespace suppresses the generation of a trailing
+ // whitespace characters (such as newlines) at the end of the indented
+ // document. Default: false.
+ SuppressTrailingWhitespace bool
}
// NewIndentSettings creates a default IndentSettings record.
-func NewIndentSettings() IndentSettings {
- return IndentSettings{
- Spaces: 4,
- UseTabs: false,
- UseCRLF: false,
- PreserveLeafWhitespace: false,
- SuppressTrailingNewline: false,
+func NewIndentSettings() *IndentSettings {
+ return &IndentSettings{
+ Spaces: 4,
+ UseTabs: false,
+ UseCRLF: false,
+ PreserveLeafWhitespace: false,
+ SuppressTrailingWhitespace: false,
+ }
+}
+
+type indentFunc func(depth int) string
+
+func getIndentFunc(s *IndentSettings) indentFunc {
+ if s.UseTabs {
+ if s.UseCRLF {
+ return func(depth int) string { return indentCRLF(depth, indentTabs) }
+ } else {
+ return func(depth int) string { return indentLF(depth, indentTabs) }
+ }
+ } else {
+ if s.Spaces < 0 {
+ return func(depth int) string { return "" }
+ } else if s.UseCRLF {
+ return func(depth int) string { return indentCRLF(depth*s.Spaces, indentSpaces) }
+ } else {
+ return func(depth int) string { return indentLF(depth*s.Spaces, indentSpaces) }
+ }
}
}
+// Writer is the interface that wraps the Write* methods called by each token
+// type's WriteTo function.
+type Writer interface {
+ io.StringWriter
+ io.ByteWriter
+ io.Writer
+}
+
// A Token is an interface type used to represent XML elements, character
// data, CDATA sections, XML comments, XML directives, and XML processing
// instructions.
type Token interface {
Parent() *Element
Index() int
- WriteTo(w XMLWriter, s *WriteSettings)
+ WriteTo(w Writer, s *WriteSettings)
dup(parent *Element) Token
setParent(parent *Element)
setIndex(index int)
@@ -390,8 +411,6 @@ func (d *Document) WriteToString() (s string, err error) {
return string(b), nil
}
-type indentFunc func(depth int) string
-
// Indent modifies the document's element tree by inserting character data
// tokens containing newlines and spaces for indentation. The amount of
// indentation per depth level is given by the 'spaces' parameter. Other than
@@ -415,37 +434,17 @@ func (d *Document) IndentTabs() {
// IndentWithSettings modifies the document's element tree by inserting
// character data tokens containing newlines and indentation. The behavior
// of the indentation algorithm is configured by the indent settings.
-func (d *Document) IndentWithSettings(s IndentSettings) {
+func (d *Document) IndentWithSettings(s *IndentSettings) {
// WriteSettings.UseCRLF is deprecated. Until removed from the package, it
// overrides IndentSettings.UseCRLF when true.
if d.WriteSettings.UseCRLF {
s.UseCRLF = true
}
- var indent indentFunc
- if s.UseTabs {
- if s.UseCRLF {
- indent = func(depth int) string { return indentCRLF(depth, indentTabs) }
- } else {
- indent = func(depth int) string { return indentLF(depth, indentTabs) }
- }
- } else {
- if s.Spaces < 0 {
- indent = func(depth int) string { return "" }
- } else if s.UseCRLF {
- indent = func(depth int) string { return indentCRLF(depth*s.Spaces, indentSpaces) }
- } else {
- indent = func(depth int) string { return indentLF(depth*s.Spaces, indentSpaces) }
- }
- }
+ d.Element.indent(0, getIndentFunc(s), s)
- d.Element.indent(0, indent, &s)
-
- if s.SuppressTrailingNewline && len(d.Element.Child) > 0 {
- n := len(d.Element.Child) - 1
- if cd, ok := d.Element.Child[n].(*CharData); ok && (cd.flags&whitespaceFlag) != 0 {
- d.Element.Child = d.Element.Child[:n]
- }
+ if s.SuppressTrailingWhitespace {
+ d.Element.stripTrailingWhitespace()
}
}
@@ -1047,6 +1046,16 @@ func (e *Element) GetRelativePath(source *Element) string {
return strings.Join(parts, "/")
}
+// IndentWithSettings modifies the element and its child tree by inserting
+// character data tokens containing newlines and indentation. The behavior of
+// the indentation algorithm is configured by the indent settings. Because
+// this function indents the element as if it were at the root of a document,
+// it is most useful when called just before writing the element as an XML
+// fragment using WriteTo.
+func (e *Element) IndentWithSettings(s *IndentSettings) {
+ e.indent(1, getIndentFunc(s), s)
+}
+
// indent recursively inserts proper indentation between an XML element's
// child tokens.
func (e *Element) indent(depth int, indent indentFunc, s *IndentSettings) {
@@ -1123,6 +1132,17 @@ func (e *Element) stripIndent(s *IndentSettings) {
e.Child = newChild
}
+// stripTrailingWhitespace removes any trailing whitespace CharData tokens
+// from the element's children.
+func (e *Element) stripTrailingWhitespace() {
+ for i := len(e.Child) - 1; i >= 0; i-- {
+ if cd, ok := e.Child[i].(*CharData); !ok || !cd.IsWhitespace() {
+ e.Child = e.Child[:i+1]
+ return
+ }
+ }
+}
+
// dup duplicates the element.
func (e *Element) dup(parent *Element) Token {
ne := &Element{
@@ -1153,18 +1173,8 @@ func (e *Element) Index() int {
return e.index
}
-// setParent replaces this element token's parent.
-func (e *Element) setParent(parent *Element) {
- e.parent = parent
-}
-
-// setIndex sets this element token's index within its parent's Child slice.
-func (e *Element) setIndex(index int) {
- e.index = index
-}
-
// WriteTo serializes the element to the writer w.
-func (e *Element) WriteTo(w XMLWriter, s *WriteSettings) {
+func (e *Element) WriteTo(w Writer, s *WriteSettings) {
w.WriteByte('<')
w.WriteString(e.FullTag())
for _, a := range e.Attr {
@@ -1190,6 +1200,16 @@ func (e *Element) WriteTo(w XMLWriter, s *WriteSettings) {
}
}
+// setParent replaces this element token's parent.
+func (e *Element) setParent(parent *Element) {
+ e.parent = parent
+}
+
+// setIndex sets this element token's index within its parent's Child slice.
+func (e *Element) setIndex(index int) {
+ e.index = index
+}
+
// addChild adds a child token to the element e.
func (e *Element) addChild(t Token) {
t.setParent(e)
@@ -1292,7 +1312,7 @@ func (a *Attr) NamespaceURI() string {
}
// WriteTo serializes the attribute to the writer.
-func (a *Attr) WriteTo(w XMLWriter, s *WriteSettings) {
+func (a *Attr) WriteTo(w Writer, s *WriteSettings) {
w.WriteString(a.FullKey())
if s.AttrSingleQuote {
w.WriteString(`='`)
@@ -1407,6 +1427,23 @@ func (c *CharData) Index() int {
return c.index
}
+// WriteTo serializes character data to the writer.
+func (c *CharData) WriteTo(w Writer, s *WriteSettings) {
+ if c.IsCData() {
+ w.WriteString(`<![CDATA[`)
+ w.WriteString(c.Data)
+ w.WriteString(`]]>`)
+ } else {
+ var m escapeMode
+ if s.CanonicalText {
+ m = escapeCanonicalText
+ } else {
+ m = escapeNormal
+ }
+ escapeString(w, c.Data, m)
+ }
+}
+
// dup duplicates the character data.
func (c *CharData) dup(parent *Element) Token {
return &CharData{
@@ -1428,23 +1465,6 @@ func (c *CharData) setIndex(index int) {
c.index = index
}
-// WriteTo serializes character data to the writer.
-func (c *CharData) WriteTo(w XMLWriter, s *WriteSettings) {
- if c.IsCData() {
- w.WriteString(`<![CDATA[`)
- w.WriteString(c.Data)
- w.WriteString(`]]>`)
- } else {
- var m escapeMode
- if s.CanonicalText {
- m = escapeCanonicalText
- } else {
- m = escapeNormal
- }
- escapeString(w, c.Data, m)
- }
-}
-
// NewComment creates an unparented comment token.
func NewComment(comment string) *Comment {
return newComment(comment, nil)
@@ -1490,6 +1510,13 @@ func (c *Comment) Index() int {
return c.index
}
+// WriteTo serialies the comment to the writer.
+func (c *Comment) WriteTo(w Writer, s *WriteSettings) {
+ w.WriteString("<!--")
+ w.WriteString(c.Data)
+ w.WriteString("-->")
+}
+
// setParent replaces the comment token's parent.
func (c *Comment) setParent(parent *Element) {
c.parent = parent
@@ -1501,13 +1528,6 @@ func (c *Comment) setIndex(index int) {
c.index = index
}
-// WriteTo serialies the comment to the writer.
-func (c *Comment) WriteTo(w XMLWriter, s *WriteSettings) {
- w.WriteString("<!--")
- w.WriteString(c.Data)
- w.WriteString("-->")
-}
-
// NewDirective creates an unparented XML directive token.
func NewDirective(data string) *Directive {
return newDirective(data, nil)
@@ -1555,6 +1575,13 @@ func (d *Directive) Index() int {
return d.index
}
+// WriteTo serializes the XML directive to the writer.
+func (d *Directive) WriteTo(w Writer, s *WriteSettings) {
+ w.WriteString("<!")
+ w.WriteString(d.Data)
+ w.WriteString(">")
+}
+
// setParent replaces the directive token's parent.
func (d *Directive) setParent(parent *Element) {
d.parent = parent
@@ -1566,13 +1593,6 @@ func (d *Directive) setIndex(index int) {
d.index = index
}
-// WriteTo serializes the XML directive to the writer.
-func (d *Directive) WriteTo(w XMLWriter, s *WriteSettings) {
- w.WriteString("<!")
- w.WriteString(d.Data)
- w.WriteString(">")
-}
-
// NewProcInst creates an unparented XML processing instruction.
func NewProcInst(target, inst string) *ProcInst {
return newProcInst(target, inst, nil)
@@ -1623,6 +1643,17 @@ func (p *ProcInst) Index() int {
return p.index
}
+// WriteTo serializes the processing instruction to the writer.
+func (p *ProcInst) WriteTo(w Writer, s *WriteSettings) {
+ w.WriteString("<?")
+ w.WriteString(p.Target)
+ if p.Inst != "" {
+ w.WriteByte(' ')
+ w.WriteString(p.Inst)
+ }
+ w.WriteString("?>")
+}
+
// setParent replaces the processing instruction token's parent.
func (p *ProcInst) setParent(parent *Element) {
p.parent = parent
@@ -1633,14 +1664,3 @@ func (p *ProcInst) setParent(parent *Element) {
func (p *ProcInst) setIndex(index int) {
p.index = index
}
-
-// WriteTo serializes the processing instruction to the writer.
-func (p *ProcInst) WriteTo(w XMLWriter, s *WriteSettings) {
- w.WriteString("<?")
- w.WriteString(p.Target)
- if p.Inst != "" {
- w.WriteByte(' ')
- w.WriteString(p.Inst)
- }
- w.WriteString("?>")
-}
diff --git a/etree_test.go b/etree_test.go
index 861feff..9f1df3e 100644
--- a/etree_test.go
+++ b/etree_test.go
@@ -5,6 +5,7 @@
package etree
import (
+ "bytes"
"encoding/xml"
"io"
"strings"
@@ -808,7 +809,8 @@ func TestIndentWithDefaultSettings(t *testing.T) {
t.Error("etree: failed to read string")
}
- doc.IndentWithSettings(NewIndentSettings())
+ settings := NewIndentSettings()
+ doc.IndentWithSettings(settings)
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
@@ -907,7 +909,7 @@ func TestIndentPreserveWhitespace(t *testing.T) {
s := NewIndentSettings()
s.Spaces = 2
s.PreserveLeafWhitespace = true
- s.SuppressTrailingNewline = true
+ s.SuppressTrailingWhitespace = true
doc.IndentWithSettings(s)
output, err := doc.WriteToString()
@@ -949,7 +951,7 @@ func TestPreserveCData(t *testing.T) {
t.Error("etree: failed to read string")
}
- output, err := doc.WriteToString()
+ output, _ := doc.WriteToString()
checkStrEq(t, output, test.expectedWithPreserve)
}
@@ -961,7 +963,7 @@ func TestPreserveCData(t *testing.T) {
t.Error("etree: failed to read string")
}
- output, err := doc.WriteToString()
+ output, _ := doc.WriteToString()
checkStrEq(t, output, test.expectedWithoutPreserve)
}
}
@@ -1326,3 +1328,33 @@ func TestWhitespace(t *testing.T) {
cd.SetData("")
checkBoolEq(t, cd.IsWhitespace(), true)
}
+
+func TestTokenWriteTo(t *testing.T) {
+ s := `<store>
+ <!-- comment -->
+ <book>
+ <title>Great Expectations</title>
+ </book>
+</store>`
+ doc := newDocumentFromString(t, s)
+
+ writeSettings := WriteSettings{}
+ indentSettings := IndentSettings{UseTabs: true}
+
+ tests := []struct {
+ path string
+ expected string
+ }{
+ {"//store", "<store>\n\t<!-- comment -->\n\t<book>\n\t\t<title>Great Expectations</title>\n\t</book>\n</store>"},
+ {"//store/book", "<book>\n\t<title>Great Expectations</title>\n</book>"},
+ {"//store/book/title", "<title>Great Expectations</title>"},
+ }
+ for _, test := range tests {
+ var buffer bytes.Buffer
+
+ c := doc.FindElement(test.path)
+ c.IndentWithSettings(&indentSettings)
+ c.WriteTo(&buffer, &writeSettings)
+ checkStrEq(t, buffer.String(), test.expected)
+ }
+}
diff --git a/helpers.go b/helpers.go
index 9ab29b6..b31fd75 100644
--- a/helpers.go
+++ b/helpers.go
@@ -329,7 +329,7 @@ const (
)
// escapeString writes an escaped version of a string to the writer.
-func escapeString(w XMLWriter, s string, m escapeMode) {
+func escapeString(w Writer, s string, m escapeMode) {
var esc []byte
last := 0
for i := 0; i < len(s); {